3

See code below. Pocketsphinx is configured with a keyphrase search to trigger on the word "record". Searching is then started, and talking causes onBeginningOfSpeech and onEndOfSpeech to be called, but no other listener methods get called, whatever I say.

public class MainActivity extends AppCompatActivity implements RecognitionListener {
    private final Handler handler = new Handler ();
    private SpeechRecognizer recognizer;
    private final static String KEYWORD = "record";

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_start);
        makeButtonStartButton ();

        ensureRecordAudioPermission ();
        startKeywordListener ();
    }

    private void startKeywordListener() {
        // Recognizer initialization is a time-consuming and it involves IO,
        // so we execute it in async task
        new AsyncTask<Void, Void, Exception>() {
            @Override
            protected Exception doInBackground(Void... params) {
                try {
                    Assets assets = new Assets(MainActivity.this);
                    File assetDir = assets.syncAssets();
                    setupRecognizer(assetDir);
                } catch (IOException e) {
                    return e;
                }
                return null;
            }

            @Override
            protected void onPostExecute(Exception result) {
                if (result != null) {
                    Log.e("MainActivity", "Failed to init recognizer " + result);
                } else {
                    startKeywordSearch ();
                }
            }
        }.execute();

    }

    private void startKeywordSearch() {
        Log.i("MainActivity", "Starting keyword search: " + KEYWORD);
        recognizer.stop();
        recognizer.startListening(KEYWORD);
    }

    private void setupRecognizer(File assetsDir) throws IOException {
        // The recognizer can be configured to perform multiple searches
        // of different kind and switch between them

        recognizer = SpeechRecognizerSetup.defaultSetup()
                .setAcousticModel(new File(assetsDir, "en-us-ptm"))
                .setDictionary(new File(assetsDir, "cmudict-en-us.dict"))

                .setRawLogDir(assetsDir) // To disable logging of raw audio comment out this call (takes a lot of space on the device)

                .getRecognizer();
        recognizer.addListener(this);

        // Create keyword-activation search.
        recognizer.addKeyphraseSearch(KEYWORD, KEYWORD);
    }

    private void ensureRecordAudioPermission() {
        // Check if user has given permission to record audio
        int permissionCheck = ContextCompat.checkSelfPermission(getApplicationContext(), Manifest.permission.RECORD_AUDIO);
        if (permissionCheck == PackageManager.PERMISSION_DENIED) {
            ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, 1);
            return;
        }
    }
    @Override
    public void onRequestPermissionsResult(int requestCode,
                                           String[] permissions, int[] grantResults) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults);

        if (requestCode == 1) {
            if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
                startKeywordListener();
            } else {
                finish();
            }
        }
    }

    @Override
    public void onDestroy() {
        super.onDestroy();

        if (recognizer != null) {
            recognizer.cancel();
            recognizer.shutdown();
        }
    }
    private void makeButtonStartButton() {
        findViewById(R.id.startButton).setOnClickListener(startRecording);
    }
    private final View.OnClickListener startRecording = new View.OnClickListener() {
        @Override
        public void onClick(View v) {
            startActivity(new Intent(MainActivity.this, RecordingActivity.class));
        }
    };

    @Override
    public void onBeginningOfSpeech() {
        Log.i ("MainActivity", "Beginning of speech detected");
    }

    @Override
    public void onEndOfSpeech() {
        Log.i ("MainActivity", "End of speech detected");
    }

    @Override
    public void onPartialResult(Hypothesis hypothesis) {
        if (hypothesis == null) return; // reject the null hypothesis (!)
        Log.i ("MainActivity", "Partial result: " + hypothesis.getHypstr() + " (" + hypothesis.getProb() + ")");
        if (hypothesis.getHypstr().equals(KEYWORD))
            startRecording.onClick(null);
    }

    @Override
    public void onResult(Hypothesis hypothesis) {
        if (hypothesis == null) return; // reject the null hypothesis (!)
        Log.i ("MainActivity", "Complete result: " + hypothesis.getHypstr() + " (" + hypothesis.getProb() + ")");
        if (hypothesis.getHypstr().equals(KEYWORD))
            startRecording.onClick(null);

    }

    @Override
    public void onError(Exception e) {
        Log.i ("MainActivity", "Error detected", e);
    }

    @Override
    public void onTimeout() {
        Log.i ("MainActivity", "Timeout occurred");
    }
}
Jules
  • 14,841
  • 9
  • 83
  • 130
  • You need to set keyword threshold – Nikolay Shmyrev Jan 25 '17 at 10:14
  • @NikolayShmyrev - do you have any guidance on what value I should be using? I've tried 1e-5f as suggested [here](http://stackoverflow.com/questions/3148603/continuous-speech-recognition-android) and 1e-40f (as suggested [here](http://stackoverflow.com/questions/24321893/voice-command-keyword-listener-in-android)) but in no case is triggering happening reliably (I've managed to get it to trigger 3 times in testing, with thousands of attempts so far). I've also tried Float.MIN_VALUE, and changing my keyword to a longer phrase, but still no luck. Any ideas? – Jules Jan 26 '17 at 02:10
  • 2
    Ah, never mind. Have figured out why it wasn't recognizing anything useful: "record" is mapped in the dictionary to a pronunciation that's substantially different to what I was saying. "record(3)" is what I was saying... – Jules Jan 26 '17 at 02:36
  • Yes, it could be pretty sensitive sometimes. – Nikolay Shmyrev Jan 26 '17 at 08:40
  • @Jules how do you discovered that? currently I have same problem with different keyword (single in dictionary, tried also with just "okay"), every `Hypothesis` is null... – snachmsm Oct 21 '17 at 11:43
  • 1
    @snachmsm - you can look up the available pronunciations from the dictionary and work out what they should sound like using [this description of the phoneme set that the dictionary uses](https://en.wikipedia.org/wiki/ARPABET). – Jules Oct 22 '17 at 03:21
  • Awesome, thanks! :) can I use some tool to record my voice and "transfom" words into Arpabet? – snachmsm Oct 22 '17 at 09:02

0 Answers0