I am using android speech recognition and written some code to identify the spoken words. Please have a look at the below code.
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import android.speech.RecognitionListener;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;
import android.app.Activity;
import android.content.Intent;
import android.os.Bundle;
import android.speech.tts.TextToSpeech;
import android.text.Html;
import android.util.Log;
import android.view.View;
import android.widget.AdapterView;
import android.widget.ArrayAdapter;
import android.widget.CompoundButton;
import android.widget.CompoundButton.OnCheckedChangeListener;
import android.widget.ListView;
import android.widget.ProgressBar;
import android.widget.TextView;
import android.widget.Toast;
import android.widget.ToggleButton;
public class MainActivity extends Activity {
private TextView returnedText;
private ToggleButton toggleButton;
private ProgressBar progressBar;
private SpeechRecognizer speech = null;
private Intent recognizerIntent;
private ListView wordList;
private String LOG_TAG = "VoiceRecognitionActivity";
private List<String> previousInterim;
private diff_match_patch diff;
private String display = "test";
private List<String>adapterList = new ArrayList<String>();
ArrayAdapter<String> adapter;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
returnedText = (TextView) findViewById(R.id.textView1);
progressBar = (ProgressBar) findViewById(R.id.progressBar1);
toggleButton = (ToggleButton) findViewById(R.id.toggleButton1);
wordList = (ListView)findViewById(R.id.word_list);
adapter = new ArrayAdapter<String>(this,android.R.layout.simple_list_item_checked, adapterList);
adapter.setNotifyOnChange(true);
wordList.setAdapter(adapter);
progressBar.setVisibility(View.INVISIBLE);
// createRecog();
speech = SpeechRecognizer.createSpeechRecognizer(getApplicationContext());
toggleButton.setOnCheckedChangeListener(new ButtonListener());
}
private class ButtonListener implements OnCheckedChangeListener, Runnable
{
boolean isChecked;
@Override
public void onCheckedChanged(CompoundButton buttonView, boolean isChecked) {
this.isChecked = isChecked;
this.run();
}
@Override
public void run() {
if (isChecked) {
speech.stopListening();
speech.cancel();
speech.destroy();
createRecog();
progressBar.setVisibility(View.VISIBLE);
progressBar.setIndeterminate(true);
speech.startListening(recognizerIntent);
adapter.clear();
returnedText.setText("");
} else {
progressBar.setIndeterminate(false);
progressBar.setVisibility(View.INVISIBLE);
speech.stopListening();
}
}
}
private class RecognitionListenerClass implements RecognitionListener
{
@Override
public void onBeginningOfSpeech() {
Log.i(LOG_TAG, "onBeginningOfSpeech");
progressBar.setIndeterminate(false);
progressBar.setMax(10);
wordList.computeScroll();
}
@Override
public void onBufferReceived(byte[] buffer) {
Log.i(LOG_TAG, "onBufferReceived: " + buffer);
}
@Override
public void onEndOfSpeech() {
Log.i(LOG_TAG, "onEndOfSpeech");
progressBar.setIndeterminate(true);
toggleButton.setChecked(false);
}
@Override
public void onError(int errorCode) {
String errorMessage = getErrorText(errorCode);
Log.d(LOG_TAG, "FAILED " + errorMessage);
returnedText.setText(errorMessage);
toggleButton.setChecked(false);
// speech = null;
toggleButton.performClick();
}
@Override
public void onEvent(int arg0, Bundle arg1) {
Log.i(LOG_TAG, "onEvent");
}
@Override
public void onPartialResults(Bundle arg0) {
Log.i(LOG_TAG, "onPartialResults");
final ArrayList<String> matches = arg0.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
final float[] scores = arg0.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES);
receiveWhatWasHeard(matches, scores);
}
@Override
public void onReadyForSpeech(Bundle arg0) {
Log.i(LOG_TAG, "onReadyForSpeech");
}
@Override
public void onResults(Bundle results) {
}
@Override
public void onRmsChanged(float rmsdB) {
Log.i(LOG_TAG, "onRmsChanged: " + rmsdB);
progressBar.setProgress((int) rmsdB);
}
}
private void receiveWhatWasHeard(ArrayList<String> matches, float[] scores) {
Log.i(LOG_TAG, matches.get(0));
returnedText.setText(matches.get(0));
}
private void createRecog()
{
speech = SpeechRecognizer.createSpeechRecognizer(this);
speech.setRecognitionListener(new RecognitionListenerClass());
recognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_PREFERENCE,
"en");
recognizerIntent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE,
this.getPackageName());
recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_WEB_SEARCH);
recognizerIntent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 3);
recognizerIntent.putExtra("android.speech.extra.DICTATION_MODE", true);
recognizerIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true);
}
@Override
public void onResume() {
super.onResume();
}
@Override
protected void onPause() {
super.onPause();
}
public String getErrorText(int errorCode) {
String message;
switch (errorCode) {
case SpeechRecognizer.ERROR_AUDIO:
message = "Audio recording error";
break;
case SpeechRecognizer.ERROR_CLIENT:
message = "Client side error";
break;
case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
message = "Insufficient permissions";
break;
case SpeechRecognizer.ERROR_NETWORK:
message = "Network error";
break;
case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
message = "Network timeout";
break;
case SpeechRecognizer.ERROR_NO_MATCH:
message = "No match";
break;
case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
message = "RecognitionService busy";
break;
case SpeechRecognizer.ERROR_SERVER:
message = "error from server";
break;
case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
message = "No speech input";
break;
default:
message = "Didn't understand, please try again.";
break;
}
return message;
}
}
However I need to run this continusly, which I failed to do all the time. Restarting the recognition at the end of speech could be a good idea but it gives that small noise and it take 1-3 seconds to load, which means, some of the spoken words will be lost.
Yes I know the Google has mentioned their system is not for continuous recognition; but several people had done work-arounds before. Unfortunately these work-arounds seems not work by now as they are written in pretty old APIs. If it was possible with older versions, then of course it is much more possible in newer versions.
So, any ideas about how to achieve this continuous recognition task? I am using API 15.