For a while now, I've been trying to get Google Speech Recognition to work in Xamarin (Android part). Unfortunately, so far I've had no success. Examples/snippits I'm using are Plugin.AudioRecorder, GrpcXamarinSamples and Google's Recognize sample.
The goal of this app is to get a streaming voice from a microphone on a Android platform, have Google process the audio, and return a string for further use in the project. The sample project works, but as soon as I try to port it to my own project, and replace NAudio.WaveIn with Plugin.AudioRecorder code, it fails to run properly. I've tried multiple ways of doing it, but none of them work, and we always get some sort of error. (right now a NullPointerException that I cannot place)
Currently, I'm stuck at the following class, where Listen() is the method that is being called when we want to have a single listen-recognize-return loop. When I call this function, in the Console I get the results below.
using System;
using System.Threading.Tasks;
using System.Reflection;
using System.IO;
using System.Threading;
using Android.App;
using Google.Apis.Auth.OAuth2;
using Google.Cloud.Speech.V1;
using Grpc.Core;
using Grpc.Auth;
using Xamarin.Forms;
using MyApp.Droid;
using Plugin.AudioRecorder;
[assembly: Dependency(typeof(GoogleSpeechRecognition))]
namespace MyApp.Droid
{
[Activity(Label = "@string/app_name", Theme = "@style/AppTheme", MainLauncher = true)]
class GoogleSpeechRecognition : ISpeechRecognition
{
Editor logEditor;
AudioRecorderService recorder;
Assembly assembly = typeof(MainActivity).GetTypeInfo().Assembly;
Stream authStream = global::Android.App.Application.Context.Assets.Open("authenticateGoogle.json");
string resultString = null;
GoogleCredential credential;
Channel channel;
SpeechClient speech;
SpeechClient.StreamingRecognizeStream streamingCall;
private void Log(string logText)
{
//logEditor.Text += String.Format("{0}: {1}\n", DateTime.Now.ToString("HH:mm:ss.fff"), logText);
Console.WriteLine(String.Format("{0}: {1}\n", DateTime.Now.ToString("HH:mm:ss.fff"), logText));
}
public async Task<string> Listen()
{
PrepareSpeechRecognition();
WriteInitialPackage();
string result = await StreamingMicRecognizeAsync();
return result;
}
public void PrepareSpeechRecognition(Editor editor = null)
{
if (editor != null)
{
logEditor = editor;
}
recorder = new AudioRecorderService
{
StopRecordingOnSilence = true, //will stop recording after max seconds of silence (defined below)
AudioSilenceTimeout = TimeSpan.FromSeconds(0.5), //will stop recording after x seconds of silence, default is 2
//SilenceThreshold = 1.0F, //indicates the threshold that determines silence. Between 0 and 1, default is .15
StopRecordingAfterTimeout = true, //stop recording after a max timeout (defined below)
//TotalAudioTimeout = TimeSpan.FromSeconds(60) //audio will stop recording after x seconds, default is 30
};
// required to protect against a System.ArgumentNullException on Xamarin.Android during initialization of type
// Microsoft.Extensions.PlatformAbstractions.ApplicationEnvironment in the dependency from Google.Api.Gax (see https://github.com/aspnet/PlatformAbstractions/blob/rel/1.1.0/src/Microsoft.Extensions.PlatformAbstractions/ApplicationEnvironment.cs).
// The field initializer for "ApplicationBasePath" calls GetApplicationBasePath() which calls Path.GetFullPath(basePath)
// where basePath is AppContext.BaseDirectory which returns null on Xamarin by default (see https://github.com/mono/mono/blob/mono-5.10.0.140/mcs/class/referencesource/mscorlib/system/AppContext/AppContext.cs)
AppDomain.CurrentDomain.SetData("APP_CONTEXT_BASE_DIRECTORY", Path.DirectorySeparatorChar.ToString());
using (var reader = new StreamReader(authStream))
{
resultString = reader.ReadToEnd();
}
credential = GoogleCredential.FromJson(resultString);
if (credential.IsCreateScopedRequired)
{
credential = credential.CreateScoped(new[] { "https://www.googleapis.com/auth/cloud-platform" });
}
channel = new Channel(SpeechClient.DefaultEndpoint.Host,
credential.ToChannelCredentials());
speech = SpeechClient.Create(channel, new SpeechSettings());
streamingCall = speech.StreamingRecognize();
}
public async void WriteInitialPackage()
{
streamingCall = speech.StreamingRecognize();
// Write the initial request with the config.
await streamingCall.WriteAsync(
new StreamingRecognizeRequest()
{
StreamingConfig = new StreamingRecognitionConfig()
{
Config = new RecognitionConfig()
{
Encoding = RecognitionConfig.Types.AudioEncoding.Flac,
SampleRateHertz = 16000,
LanguageCode = "en",
},
InterimResults = true,
SingleUtterance = true,
}
}
);
}
public async Task<string> StreamingMicRecognizeAsync()
{
string callResult = "";
// Print responses as they arrive.
Task printResponses = Task.Run(async () =>
{
while (await streamingCall.ResponseStream.MoveNext(
default(CancellationToken)))
{
foreach (var result in streamingCall.ResponseStream
.Current.Results)
{
foreach (var alternative in result.Alternatives)
{
if (result.IsFinal)
{
Console.WriteLine("FINAL: ");
Console.WriteLine(alternative.Transcript);
callResult = alternative.Transcript;
}
Console.WriteLine(alternative.Transcript);
}
}
}
});
byte[] buffer = new byte[32 * 1024];
var audioRecordTask = await recorder.StartRecording();
int bytesRead = 0;
Log("STARTED RECORDING");
using (var _stream = recorder.GetAudioFileStream())
{
Log("Using");
while (recorder.IsRecording)
{
Log("Read bytes");
// [THIS IS WHERE IT ERRORS OUT]
try
{
bytesRead = await _stream.ReadAsync(buffer, 0, buffer.Length);
}
catch(Exception e)
{
Log("Exception: " + e.Message);
}
// [/THIS IS WHERE IT ERRORS OUT]
Log("Write");
streamingCall.WriteAsync(
new StreamingRecognizeRequest()
{
AudioContent = Google.Protobuf.ByteString.CopyFrom(buffer, 0, bytesRead)
}
).Wait();
Log("Delay?");
//await Task.Delay(500);
};
Log("FINISHED RECORDING");
await streamingCall.WriteCompleteAsync();
}
Log("STOP _STREAM");
await streamingCall.WriteCompleteAsync();
await printResponses;
return callResult;
}
}
}
12-08 09:45:23.721 I/mono-stdout(19951): 09:45:23.605: STARTED RECORDING 12-08 09:45:23.728 I/mono-stdout(19951): 09:45:23.605: STARTED RECORDING
09:45:23.734: Using 12-08 09:45:23.734 I/mono-stdout(19951): 09:45:23.734: Using 12-08 09:45:23.734 I/mono-stdout(19951): 09:45:23.738: Read bytes
12-08 09:45:23.738 I/mono-stdout(19951): 09:45:23.738: Read bytes 12-08 09:45:23.739 I/mono-stdout(19951): 12-08 09:45:23.795 I/Choreographer(19951): Skipped 43 frames! The application may be doing too much work on its main thread. Unhandled Exception:
System.NullReferenceException: Object reference not set to an instance of an object.
Sorry for the messy code and all, it's a learning project which had many iterations.
Google Authentication works, we did check that with the sample project.
Edit: On request, the full log: https://pastebin.com/q3G62ByT