Disclaimer: I am a newbie to c# and Xamarin.Forms - sorry for missing anything obvious.
I am trying to create an app that takes user input in the form of a voice command (using Speech-To-Text) and outputs an audio announcement from the application (using Text-To-Speech).
The issue is that when you start recording audio for the Speech-To-Text service, the device's audio is set to recording mode (not sure what the technical term for this is called) and playback audio is set to a very low volume (as described in this SO question and here) and here.
I'm ideally looking for a way to revert this so that once the appropriate voice command is recognised (i.e. 'Secret command') via Speech-To-Text, the user can hear the secret phrase back at full/normal volume through Text-To-Speech in a Xamarin Forms application.
I tried to produce a working example by adapting the sample code for Azure Cognitive Speech Service. I cloned the code and adapted the Xaml and CS for the MainPage slightly, as shown below, to stop the speech recognition service once a certain voice command is triggered and then activate a phrase to be spoken via the Text-To-Speech service. My sample demonstrates the issue. If the user starts by selecting the Transcribe button and enters the appropriate voice command, they should hear back the secret phrase, but the playback volume is so low when testing on a physical IOS device I can barely hear it.
XAML
<ContentPage xmlns="http://xamarin.com/schemas/2014/forms"
xmlns:x="http://schemas.microsoft.com/winfx/2009/xaml"
xmlns:d="http://xamarin.com/schemas/2014/forms/design"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
mc:Ignorable="d"
x:Class="CognitiveSpeechService.MyPage"
Title="Speech Services Transcription"
Padding="10,35,10,10">
<StackLayout>
<Frame BorderColor="DarkGray"
CornerRadius="10"
HeightRequest="300"
WidthRequest="280"
HorizontalOptions="Center"
VerticalOptions="Start"
BackgroundColor="LightGray">
<ScrollView x:Name="scroll">
<Label x:Name="transcribedText"
Margin="10,10,10,10" />
</ScrollView>
</Frame>
<ActivityIndicator x:Name="transcribingIndicator"
HorizontalOptions="Center"
VerticalOptions="Start"
WidthRequest="300"
IsRunning="False" />
<Button x:Name="transcribeButton"
WidthRequest="300"
HeightRequest="50"
Text="Transcribe"
TextColor="White"
CornerRadius="10"
BackgroundColor="Green"
BorderColor="DarkGray"
BorderWidth="1"
FontAttributes="Bold"
HorizontalOptions="Center"
VerticalOptions="Start"
Clicked="TranscribeClicked"/>
<Button x:Name="SpeakBtn"
WidthRequest="300"
HeightRequest="50"
Text="Speak"
TextColor="White"
CornerRadius="10"
BackgroundColor="Red"
BorderColor="DarkGray"
BorderWidth="1"
FontAttributes="Bold"
HorizontalOptions="Center"
VerticalOptions="Start"
Clicked="SpeakBtn_Clicked"/>
</StackLayout>
</ContentPage>
Code-behind
namespace CognitiveSpeechService
{
public partial class MyPage : ContentPage
{
AudioRecorderService recorder = new AudioRecorderService();
SpeechRecognizer recognizer;
IMicrophoneService micService;
bool isTranscribing = false;
public MyPage()
{
InitializeComponent();
micService = DependencyService.Resolve<IMicrophoneService>();
}
async void TranscribeClicked(object sender, EventArgs e)
{
bool isMicEnabled = await micService.GetPermissionAsync();
// EARLY OUT: make sure mic is accessible
if (!isMicEnabled)
{
UpdateTranscription("Please grant access to the microphone!");
return;
}
// initialize speech recognizer
if (recognizer == null)
{
var config = SpeechConfig.FromSubscription(Constants.CognitiveServicesApiKey, Constants.CognitiveServicesRegion);
recognizer = new SpeechRecognizer(config);
recognizer.Recognized += (obj, args) =>
{
UpdateTranscription(args.Result.Text);
};
}
// if already transcribing, stop speech recognizer
if (isTranscribing)
{
StopSpeechRecognition();
}
// if not transcribing, start speech recognizer
else
{
Device.BeginInvokeOnMainThread(() =>
{
InsertDateTimeRecord();
});
try
{
await recognizer.StartContinuousRecognitionAsync();
}
catch (Exception ex)
{
UpdateTranscription(ex.Message);
}
isTranscribing = true;
}
UpdateDisplayState();
}
// https://stackoverflow.com/questions/56514413/volume-has-dropped-significantly-in-text-to-speech-since-adding-speech-to-text
private async void StopSpeechRecognition()
{
if (recognizer != null)
{
try
{
await recognizer.StopContinuousRecognitionAsync();
Console.WriteLine($"IsRecording: {recorder.IsRecording}");
}
catch (Exception ex)
{
UpdateTranscription(ex.Message);
}
isTranscribing = false;
UpdateDisplayState();
}
}
void UpdateTranscription(string newText)
{
Device.BeginInvokeOnMainThread(() =>
{
if (!string.IsNullOrWhiteSpace(newText))
{
if (newText.ToLower().Contains("Secret command"))
{
Console.WriteLine("heart rate voice command detected");
// stop speech recognition
StopSpeechRecognition();
// do callout
string success = "this works!";
var settings = new SpeechOptions()
{
Volume = 1.0f,
};
TextToSpeech.SpeakAsync(success, settings);
// start speech recongition
} else
{
transcribedText.Text += $"{newText}\n";
}
}
});
}
void InsertDateTimeRecord()
{
var msg = $"=================\n{DateTime.Now.ToString()}\n=================";
UpdateTranscription(msg);
}
void UpdateDisplayState()
{
Device.BeginInvokeOnMainThread(() =>
{
if (isTranscribing)
{
transcribeButton.Text = "Stop";
transcribeButton.BackgroundColor = Color.Red;
transcribingIndicator.IsRunning = true;
}
else
{
transcribeButton.Text = "Transcribe";
transcribeButton.BackgroundColor = Color.Green;
transcribingIndicator.IsRunning = false;
}
});
}
async void SpeakBtn_Clicked(object sender, EventArgs e)
{
await TextToSpeech.SpeakAsync("Sample audio line. Blah blah blah. ");
}
}
}
Thanks for your help!