3

I'm currently using cameraX to record video with audio and it's working pretty well, however now I have to use SpeechRecognizer while recording the video and get the transcription of what the user is saying .

UPDATE:

I've tried extracting the audio from the video and then feed it to the intent as an extra but it doesn't work. So I thought about getting the audio from the SpeechRecognizer intent directly but that was a dead end since there is no mention of a uri or a temporary file anywhere in the doc. I'm now trying to launch a mediaRecorder when the speechRecognizer starts listening however that only works in the first listen .

I have basically tried everything under the sun but made zero headway, anyone got new ideas?

    private fun doWhileRecording(state:UiState){
    when(state){
     UiState.IDLE -> {
            
                    }
                    UiState.RECORDING -> {
                        tts!!.stop()
                        startSpeechToText()
             
                    }
                    UiState.FINALIZED -> {
                       
                    }
    }
    
    private fun startSpeechToText() {
            val speechRecognizer = SpeechRecognizer.createSpeechRecognizer(this.requireContext())
            val speechRecognizerIntent = Intent(RecognizerIntent.EXTRA_AUDIO_INJECT_SOURCE)
            speechRecognizerIntent.putExtra(
                RecognizerIntent.EXTRA_LANGUAGE_MODEL,
                RecognizerIntent.LANGUAGE_MODEL_FREE_FORM
            )
            speechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault())
    
            speechRecognizer.setRecognitionListener(object : RecognitionListener {
                override fun onReadyForSpeech(bundle: Bundle?) {
                    Log.i("STT","ready")
                }
                override fun onBeginningOfSpeech() {
                    Log.i("STT","begin")
                   startRecordingAudio()
                }
                override fun onRmsChanged(v: Float) {}
                override fun onBufferReceived(bytes: ByteArray?) {}
                override fun onEndOfSpeech() {}
                override fun onError(i: Int) {}
    
                override fun onResults(bundle: Bundle) {
                    val result = bundle.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
                    if (result != null) {
                        // result[0] will give the output of speech
                        Log.i("STT","you said: ${result[0]}")
                      stopAudioRecording()
                    }else{
                        Log.i("STT","results null boo")
                    }
                }
                override fun onPartialResults(bundle: Bundle) {}
                override fun onEvent(i: Int, bundle: Bundle?) {}
            })
            // starts listening ...
            speechRecognizer.startListening(speechRecognizerIntent)
        }

 private fun startRecordingAudio() {
    
            // below method is used to initialize
            // the media recorder clss
            mRecorder = MediaRecorder()

            // below method is used to set the audio
            // source which we are using a mic.
            mRecorder.setAudioSource(MediaRecorder.AudioSource.VOICE_RECOGNITION)
            mRecorder.setOutputFormat(MediaRecorder.OutputFormat.THREE_GPP)

            mRecorder.setAudioEncoder(MediaRecorder.AudioEncoder.AMR_NB)

            mRecorder.setOutputFile(audioFileRecorded.path)
            try {

                mRecorder.prepare()
            } catch (e: IOException) {
                Log.e("TAG", "prepare() failed")
            }
            // start method will start
            // the audio recording.
            mRecorder.start()


    }
private fun stopAudioRecording(){
        mRecorder.stop();

        // below method will release
        // the media recorder class.
        mRecorder.release()

}
confusedstudent
  • 353
  • 3
  • 11

0 Answers0