3

So I have this voice recognition code that I have been working with utilizing Microsoft's Speech Recognition Engine.

Unfortunately, it is not that fantastic at understanding voice, so I have been thinking of ways around this. One of these is switching between specific grammars and general dictionary grammars. I cannot seem to figure out how I can switch between grammars though in the case one does not recognize the given speech.

If anyone can help me figure out how to construct this, again that being the ability to switch from, lets say, my commandList grammar to DictationGrammar(), whenever my commandList grammar cannot recognize the speech picked up.

Here is the code:

//using Microsoft.Speech.Recognition;

using System;
using System.Speech.Recognition;
using System.Windows.Forms;
using System.Collections.Generic;

namespace vRec
{
  public class Form1
  {
      static int counter = 0;
      static bool stop = false;
      static string command = null;
      static List<String> commandList = new List<string>() { "zooey", "open", "quit", "search", "close", "yes", "no" };
      static Choices keywords = new Choices();

     public static void Main()
    {
        command = null;
        stop = false;

      // Create an in-process speech recognizer for the en-US locale.
      using (
       SpeechRecognitionEngine recognizer =
        new SpeechRecognitionEngine(
          new System.Globalization.CultureInfo("en-US")))
      {

        keywords.Add(commandList.ToArray());
        GrammarBuilder grammarBuilder = new GrammarBuilder(keywords);
        Grammar testGrammar = new Grammar(grammarBuilder);
        recognizer.LoadGrammar(testGrammar);
        recognizer.LoadGrammar(new DictationGrammar());

        // Add a handler for the speech recognized event.
        recognizer.SpeechRecognized += 
          new EventHandler<SpeechRecognizedEventArgs>(recognizer_SpeechRecognized);

        // Configure input to the speech recognizer.
        recognizer.SetInputToDefaultAudioDevice();

        // Start asynchronous, continuous speech recognition.
        recognizer.RecognizeAsync(RecognizeMode.Multiple);

        Console.WriteLine("NOT TERMINATED");

         // Keep the console window open.
         if(!stop)
         Console.ReadLine();
    }
 }

    // Handle the SpeechRecognized event.
    public static void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
    {
        if (counter == 2 && (e.Result.Text.ToUpper() == "YES" || e.Result.Text.ToUpper() == "US" || e.Result.Text.ToUpper() == "AS"))
        {
            counter = 0;
            Console.WriteLine("THANK YOU LORD JEBUS");
            stop = true;
            SendKeys.SendWait("{ENTER}");
            //command string to be passed in for functions in c++ code
        }
        else if (counter == 2 && (e.Result.Text.ToUpper() == "NO" || e.Result.Text.ToUpper() == "NOW" || e.Result.Text.ToUpper() == "KNOW" || e.Result.Text.ToUpper() == "OH" || e.Result.Text.ToUpper() == "NOT" || e.Result.Text.ToUpper() == "NOPE" || e.Result.Text.ToUpper() == "NAH"))
        {
            Console.WriteLine("Can you spell that?");
            counter = 1;
            command = e.Result.Text;
        }
        else if (counter == 2 && (e.Result.Text.ToUpper() != "YES" || e.Result.Text.ToUpper() != "US" || e.Result.Text.ToUpper() != "AS" || e.Result.Text.ToUpper() != "NO" || e.Result.Text.ToUpper() != "NOW" || e.Result.Text.ToUpper() != "KNOW" || e.Result.Text.ToUpper() != "OH" || e.Result.Text.ToUpper() != "NOT" || e.Result.Text.ToUpper() != "NOPE" || e.Result.Text.ToUpper() != "NAH"))
        {
            //Console.WriteLine(counter);
            Console.WriteLine("Can you repeat that?");
            counter = 1;
        }

        if (counter == 1) 
        {
            command = e.Result.Text;

            if (e.Result.Text.ToUpper() == "ALL BEEN" || e.Result.Text.ToUpper() == "OPIUM" || e.Result.Text.ToUpper() == "OLD AND" || e.Result.Text.ToUpper() == "HOLE IN" || e.Result.Text.ToUpper() == "HOPING" || e.Result.Text.ToUpper() == "OLD BEEN" || e.Result.Text.ToUpper() == "OPEN")
                command = "open";

            if (e.Result.Text.ToUpper() == "WAIT" || e.Result.Text.ToUpper() == "QUITE" || e.Result.Text.ToUpper() == "QUIP" || e.Result.Text.ToUpper() == "QUICK" || e.Result.Text.ToUpper() == "CLIP" || e.Result.Text.ToUpper() == "QUIT")
                command = "quit";

            if (e.Result.Text.ToUpper() == "SUCH" || e.Result.Text.ToUpper() == "SORT" || e.Result.Text.ToUpper() == "SEARCH")
                command = "search";

            if (e.Result.Text.ToUpper() == "RULES" || e.Result.Text.ToUpper() == "FELLOWS" || e.Result.Text.ToUpper() == "CLOSE")
                command = "close";

            commandList.Add(command);
            Console.WriteLine(counter);
            Console.WriteLine("Recognized text: " + command);
            Console.WriteLine("Is this correct?");

            for (int i = 0; i < commandList.Count; i++)
            {
                Console.WriteLine("/" + commandList[i]);
            }

            counter++;

        }

        if (e.Result.Text.ToUpper() == "ZOOEY" || e.Result.Text.ToUpper() == "ZOE" || e.Result.Text.ToUpper() == "EASILY" || e.Result.Text.ToUpper() == "SALLY" || e.Result.Text.ToUpper() == "ZONE" || e.Result.Text.ToUpper() == "ZONE WE" || e.Result.Text.ToUpper() == "SOLELY" || e.Result.Text.ToUpper() == "ZOELLICK" && counter == 0)
        {
            counter++;
            Console.WriteLine("How can I help you?");
        }

        Console.WriteLine("Recognized text: " + e.Result.Text);
    }

    public static string getCommand()
    {
        return command;
    }

  }
}

Any help would be appreciated. ^.^

Jaromando
  • 309
  • 2
  • 3
  • 9

1 Answers1

1

One way i could think of:

Set a confidence threshold, arbitrarily choosing 0.6, and then switching the grammar if the picked up speech is below that threshold inside your recognizer_SpeechRecognized method. For which, you should use 'SpeechRecognitionEngine.UnloadAllGrammars' in conjunction with SpeechRecognitionEngine.LoadGrammarAsync to get the confidence of the recognized speech,

e.Result.Confidence`, so your code could look like: 
    if (e.Result.Confidence <0.6)  {
      recognizer.RequestRecognizerUpdate();
      recognizer.UnloadAllGrammars();
      recognizer.LoadGrammarAsync(//switch your grammmars);

    }

you will have to take a look at the availability of your grammars,etc. in this scope. Hope this helps!

Priyank
  • 1,513
  • 1
  • 18
  • 36