How to convert speech to text?

How to convert speech to text? - c#

I am trying to develop the following functionality.
The first task to convert text to voice - DONE
The second task to convert voice to text - Getting issue
The third task to implement these both on the given chat board where already AI chat is
I am using following code to get the text from voice/speech.
I am getting the result but is not proper which I want.
Please check below code snippet.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;
using System.Speech.Recognition;
using System.Speech.Synthesis;
namespace StartingWithSpeechRecognition
{
class Program
{
static SpeechRecognitionEngine _recognizer = null;
static ManualResetEvent manualResetEvent = null;
static void Main(string[] args)
{
manualResetEvent = new ManualResetEvent(false);
Console.WriteLine("To recognize speech, and write 'test' to the console, press 0");
Console.WriteLine("To recognize speech and make sure the computer speaks to you, press 1");
Console.WriteLine("To emulate speech recognition, press 2");
Console.WriteLine("To recognize speech using Choices and GrammarBuilder.Append, press 3");
Console.WriteLine("To recognize speech using a DictationGrammar, press 4");
Console.WriteLine("To get a prompt building example, press 5");
ConsoleKeyInfo pressedKey = Console.ReadKey(true);
char keychar = pressedKey.KeyChar;
Console.WriteLine("You pressed '{0}'", keychar);
switch (keychar)
{
case '0':
RecognizeSpeechAndWriteToConsole();
break;
case '1':
RecognizeSpeechAndMakeSureTheComputerSpeaksToYou();
break;
case '2':
EmulateRecognize();
break;
case '3':
SpeechRecognitionWithChoices();
break;
case '4':
SpeechRecognitionWithDictationGrammar();
break;
case '5':
PromptBuilding();
break;
default:
Console.WriteLine("You didn't press 0, 1, 2, 3, 4, or 5!");
Console.WriteLine("Press any key to continue . . .");
Console.ReadKey(true);
Environment.Exit(0);
break;
}
if (keychar != '5')
{
manualResetEvent.WaitOne();
}
if (_recognizer != null)
{
_recognizer.Dispose();
}
Console.WriteLine("Press any key to continue . . .");
Console.ReadKey(true);
}
#region Recognize speech and write to console
static void RecognizeSpeechAndWriteToConsole()
{
_recognizer = new SpeechRecognitionEngine();
_recognizer.LoadGrammar(new Grammar(new GrammarBuilder("test"))); // load a "test" grammar
_recognizer.LoadGrammar(new Grammar(new GrammarBuilder("exit"))); // load a "exit" grammar
_recognizer.SpeechRecognized += _recognizeSpeechAndWriteToConsole_SpeechRecognized; // if speech is recognized, call the specified method
_recognizer.SpeechRecognitionRejected += _recognizeSpeechAndWriteToConsole_SpeechRecognitionRejected; // if recognized speech is rejected, call the specified method
_recognizer.SetInputToDefaultAudioDevice(); // set the input to the default audio device
_recognizer.RecognizeAsync(RecognizeMode.Multiple); // recognize speech asynchronous
}
static void _recognizeSpeechAndWriteToConsole_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
if (e.Result.Text == "test")
{
Console.WriteLine("test");
}
else if (e.Result.Text == "exit")
{
manualResetEvent.Set();
}
}
static void _recognizeSpeechAndWriteToConsole_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
{
Console.WriteLine("Speech rejected. Did you mean:");
foreach (RecognizedPhrase r in e.Result.Alternates)
{
Console.WriteLine(" " + r.Text);
}
}
#endregion
#region Recognize speech and make sure the computer speaks to you (text to speech)
static void RecognizeSpeechAndMakeSureTheComputerSpeaksToYou()
{
_recognizer = new SpeechRecognitionEngine();
_recognizer.LoadGrammar(new Grammar(new GrammarBuilder("hello computer"))); // load a "hello computer" grammar
_recognizer.SpeechRecognized += _recognizeSpeechAndMakeSureTheComputerSpeaksToYou_SpeechRecognized; // if speech is recognized, call the specified method
_recognizer.SpeechRecognitionRejected += _recognizeSpeechAndMakeSureTheComputerSpeaksToYou_SpeechRecognitionRejected;
_recognizer.SetInputToDefaultAudioDevice(); // set the input to the default audio device
_recognizer.RecognizeAsync(RecognizeMode.Multiple); // recognize speech asynchronous
}
static void _recognizeSpeechAndMakeSureTheComputerSpeaksToYou_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
if (e.Result.Text == "hello computer")
{
SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer();
speechSynthesizer.Speak("hello user");
speechSynthesizer.Dispose();
}
manualResetEvent.Set();
}
static void _recognizeSpeechAndMakeSureTheComputerSpeaksToYou_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
{
if (e.Result.Alternates.Count == 0)
{
Console.WriteLine("No candidate phrases found.");
return;
}
Console.WriteLine("Speech rejected. Did you mean:");
foreach (RecognizedPhrase r in e.Result.Alternates)
{
Console.WriteLine(" " + r.Text);
}
}
#endregion
#region Emulate speech recognition
static void EmulateRecognize()
{
_recognizer = new SpeechRecognitionEngine();
_recognizer.LoadGrammar(new Grammar(new GrammarBuilder("emulate speech"))); // load "emulate speech" grammar
_recognizer.SpeechRecognized += _emulateRecognize_SpeechRecognized;
_recognizer.EmulateRecognize("emulate speech");
}
static void _emulateRecognize_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
if (e.Result.Text == "emulate speech")
{
Console.WriteLine("Speech was emulated!");
}
manualResetEvent.Set();
}
#endregion
#region Speech recognition with Choices and GrammarBuilder.Append
static void SpeechRecognitionWithChoices()
{
_recognizer = new SpeechRecognitionEngine();
GrammarBuilder grammarBuilder = new GrammarBuilder();
grammarBuilder.Append("I"); // add "I"
grammarBuilder.Append(new Choices("like", "dislike")); // load "like" & "dislike"
grammarBuilder.Append(new Choices("dogs", "cats", "birds", "snakes", "fishes", "tigers", "lions", "snails", "elephants")); // add animals
_recognizer.LoadGrammar(new Grammar(grammarBuilder)); // load grammar
_recognizer.SpeechRecognized += speechRecognitionWithChoices_SpeechRecognized;
_recognizer.SetInputToDefaultAudioDevice(); // set input to default audio device
_recognizer.RecognizeAsync(RecognizeMode.Multiple); // recognize speech
}
static void speechRecognitionWithChoices_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
Console.WriteLine("Do you really " + e.Result.Words[1].Text + " " + e.Result.Words[2].Text + "?");
manualResetEvent.Set();
}
#endregion
#region Speech recognition with DictationGrammar
static void SpeechRecognitionWithDictationGrammar()
{
_recognizer = new SpeechRecognitionEngine();
_recognizer.LoadGrammar(new Grammar(new GrammarBuilder("exit")));
_recognizer.LoadGrammar(new DictationGrammar());
_recognizer.SpeechRecognized += speechRecognitionWithDictationGrammar_SpeechRecognized;
_recognizer.SetInputToDefaultAudioDevice();
_recognizer.RecognizeAsync(RecognizeMode.Multiple);
}
static void speechRecognitionWithDictationGrammar_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
if (e.Result.Text == "exit")
{
manualResetEvent.Set();
return;
}
Console.WriteLine("You said: " + e.Result.Text);
}
#endregion
#region Prompt building
static void PromptBuilding()
{
PromptBuilder builder = new PromptBuilder();
builder.StartSentence();
builder.AppendText("This is a prompt building example.");
builder.EndSentence();
builder.StartSentence();
builder.AppendText("Now, there will be a break of 2 seconds.");
builder.EndSentence();
builder.AppendBreak(new TimeSpan(0, 0, 2));
builder.StartStyle(new PromptStyle(PromptVolume.ExtraSoft));
builder.AppendText("This text is spoken extra soft.");
builder.EndStyle();
builder.StartStyle(new PromptStyle(PromptRate.Fast));
builder.AppendText("This text is spoken fast.");
builder.EndStyle();
SpeechSynthesizer synthesizer = new SpeechSynthesizer();
synthesizer.Speak(builder);
synthesizer.Dispose();
}
#endregion
}
}
If this is the wrong way then please suggest me right way or any reference link or tutorial will be highly appreciated.

The System.Speech.Recognition is an old API.
I think you have to use Google Speech API: https://cloud.google.com/speech/docs/basics Or MS Bing speech API: https://azure.microsoft.com/en-us/services/cognitive-services/speech/
I preferred the Google API. And here is very small example:
using Google.Apis.Auth.OAuth2;
using Google.Cloud.Speech.V1;
using Grpc.Auth;
using System;
var speech = SpeechClient.Create( channel );
var response = speech.Recognize( new RecognitionConfig()
{
Encoding = RecognitionConfig.Types.AudioEncoding.Linear16,
SampleRateHertz = 16000,
LanguageCode = "hu",
}, RecognitionAudio.FromFile( "888.wav" ) );
foreach ( var result in response.Results )
{
foreach ( var alternative in result.Alternatives )
{
Console.WriteLine( alternative.Transcript );
}
}
But you can find more samples:
https://cloud.google.com/speech/docs/samples
Regrads

Related

Detecting specific phrase to start recognition for command

I am currently messing around with the System.Speech to make a Amazon Alexa type program where you will ask it a command and will speak back an answer.
Right now I have it set up to where I click a button so it starts detecting audio input from the microphone, but I want to do the Alexa style or Google style ("Alexa" or "Hey Google").
Here is what I have so far:
public partial class SpeechRecognition : Form
{
SpeechRecognitionEngine recEngine = new SpeechRecognitionEngine();
SpeechSynthesizer synth = new SpeechSynthesizer();
public SpeechRecognition()
{
InitializeComponent();
}
private void SpeechRecognition_Load(object sender, EventArgs e)
{
Choices command = new Choices();
command.Add(new string[] { "Say Hello", "Print my name", "speak selected text", "time", "current weather", "humidity"});
GrammarBuilder gBuilder = new GrammarBuilder();
gBuilder.Append(command);
Grammar grammar = new Grammar(gBuilder);
recEngine.LoadGrammarAsync(grammar);
recEngine.SetInputToDefaultAudioDevice();
recEngine.SpeechRecognized += recEngine_SpeechRecognized;
}
void recEngine_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
synth.SelectVoiceByHints(VoiceGender.Neutral);
var request = new ForecastIORequest("...", 38.47382f, -76.50636f, DateTime.Now, Unit.us);
var response = request.Get();
switch (e.Result.Text)
{
case "Say Hello":
PromptBuilder builder = new PromptBuilder();
builder.StartSentence();
builder.AppendText("Hello Zack");
builder.EndSentence();
builder.AppendBreak(new TimeSpan(0, 0, 0, 0,50));
builder.StartSentence();
builder.AppendText("How are you?");
builder.EndSentence();
synth.SpeakAsync(builder);
break;
case "Print my name":
richTextBox1.Text += "\nZack";
break;
case "speak selected text":
synth.SpeakAsync(richTextBox1.SelectedText);
break;
case "time":
DateTime time = DateTime.Now;
synth.SpeakAsync(time.ToShortTimeString());
break;
case "current weather":
response = request.Get();
synth.SpeakAsync("The weather in St. Leonard is " + response.currently.summary + "with a temperature of " + Math.Round((decimal)response.currently.temperature, 0) + " degrees farenheit");
break;
case "humidity":
response = request.Get();
synth.SpeakAsync("Current humidity is " + 100 * response.currently.humidity + "percent");
break;
default:
synth.SpeakAsync("I did not recognize a command");
break;
}
}
private void enableBtn_Click(object sender, EventArgs e)
{
recEngine.RecognizeAsync(RecognizeMode.Multiple);
disableBtn.Enabled = true;
}
private void disableBtn_Click(object sender, EventArgs e)
{
recEngine.RecognizeAsyncStop();
disableBtn.Enabled = false;
}
How would I alter this so that it constantly listens for that one phrase then it will listen for a command after that? I'm guessing I need to create another instance of the recognition engine? Any help would be great.

C# Voice Command: Wait for input

I have a Windows Form app that recognizes voice commands and then performs the action accordingly. However, I can't figure out how to speak one command after the other.
Code:
if (e.Result.Text == "initiate power saving mode")
{
Taskbar taskbar = new Taskbar();
taskbar.Show();
SoundPlayer deacr = new SoundPlayer(Properties.Resources.deacr);
deacr.PlaySync();
if (e.Result.Text== "confirm")
{
SoundPlayer deacd = new SoundPlayer(Properties.Resources.deacd);
deacd.PlaySync();
Application.SetSuspendState(PowerState.Suspend, true, true);
}
else if (e.Result.Text == "cancel")
{
SoundPlayer cancelled = new SoundPlayer(Properties.Resources.cancelled);
cancelled.PlaySync();
}
}
Am I missing something, or just doing something wrong?

You need to use System.Speech.This is how i do it on my system. You can do the following:
using System.Speech.Synthesis;
using System.Speech.Recognition;
namespace Alexis
{
public partial class frmMain : Form
{
SpeechRecognitionEngine _recognizer = new SpeechRecognitionEngine();
SpeechSynthesizer Alexis = new SpeechSynthesizer();
SpeechRecognitionEngine startlistening = new SpeechRecognitionEngine();
}
// ...
}
then in the main form
private void frmMain_Load(object sender, EventArgs e)
{
_recognizer.SetInputToDefaultAudioDevice();
_recognizer.LoadGrammarAsync(new Grammar(new GrammarBuilder(new Choices(File.ReadAllLines(#"Default Commands.txt")))));
_recognizer.SpeechDetected += new EventHandler<SpeechDetectedEventArgs>(_recognizer_SpeechDetected);
_recognizer.RecognizeAsync(RecognizeMode.Multiple);
startlistening.SetInputToDefaultAudioDevice();
startlistening.LoadGrammarAsync(new Grammar(new GrammarBuilder(new Choices("alexis"))));
startlistening.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(startlistening_SpeechRecognized);
}
then do your coding. Now in order to call the commands you need to create a txt document. and put in the commands one line at a time (do not leave any open lines).
_recognizer.LoadGrammarAsync(new Grammar(new GrammarBuilder(new Choices(File.ReadAllLines(#"Default Commands.txt")))));
will trigger the commands, so that way you can call the commands. Also i would not use if (e.Result.Text == "initiate power saving mode") i would use it this way if (speech == "initiate power saving mode")
so if you wanted to continue you could do this
if (speech == "initiate power saving mode")
{
Taskbar taskbar = new Taskbar();
taskbar.Show();
SoundPlayer deacr = new SoundPlayer(Properties.Resources.deacr);
deacr.PlaySync();
}
else if (speech== "confirm")
{
SoundPlayer deacd = new SoundPlayer(Properties.Resources.deacd);
deacd.PlaySync();
Application.SetSuspendState(PowerState.Suspend, true, true);
}
else if (speech == "cancel")
{
SoundPlayer cancelled = new SoundPlayer(Properties.Resources.cancelled);
cancelled.PlaySync();
}
be sure to put the command "initiate power saving mode" and "cancel" in the commands txt document (case sensitive).
then if you wanted to keep releases down then you can create a tabbed form and add your own custom commands. Hope this helps. But remember this is an example for you to go by.

c# - Record audio on keydown

The problem is simple: I want to record audio using the microphone when and only when a key is held down. When the key is lifted I want it to stop recording. Then pass this recording to the speech recognition.
The issue is when I press and hold the Z button, It will go into the gkh_KeyUp event and continue to run this code. I want to simply record while it is held, not run this record function a bunch of times.
I want to use this in conjunction with Microsofts speech recognition.
using System.Speech.Recognition;
public partial class Form1 : Form
{
SpeechRecognitionEngine recognizer = new SpeechRecognitionEngine();
Grammar dictationGrammar = new DictationGrammar();
public Form1()
{
InitializeComponent();
globalKeyboardHook gkh = new globalKeyboardHook();
gkh.HookedKeys.Add(Keys.Z);
//gkh.HookedKeys.Add(Keys.B);
gkh.KeyDown += new KeyEventHandler(gkh_KeyDown);
gkh.KeyUp += new KeyEventHandler(gkh_KeyUp);
recognizer = new SpeechRecognitionEngine();
dictationGrammar = new DictationGrammar();
recognizer.LoadGrammar(dictationGrammar);
}
void gkh_KeyUp(object sender, KeyEventArgs e)
{
textBox1.Text = "Up" + e.KeyCode.ToString();
try
{
RecognitionResult result = recognizer.Recognize();
textBox1.Text = result.Text;
}
catch (InvalidOperationException exception)
{
textBox1.Text = String.Format("Could not recognize input from default aduio device. Is a microphone or sound card available?\r\n{0} - {1}.", exception.Source, exception.Message);
}
e.Handled = true;
}
void gkh_KeyDown(object sender, KeyEventArgs e)
{
textBox1.Text += "Down" + e.KeyCode.ToString();
try
{
//recognizer.SetInputToDefaultAudioDevice();
recognizer.SetInputToAudioStream()
}
catch (InvalidOperationException exception)
{
button1.Text = String.Format("Could not recognize input from default aduio device. Is a microphone or sound card available?\r\n{0} - {1}.", exception.Source, exception.Message);
}
e.Handled = true;
}
private void button1_Click(object sender, EventArgs e)
{
SpeechRecognitionEngine recognizer = new SpeechRecognitionEngine();
Grammar dictationGrammar = new DictationGrammar();
recognizer.LoadGrammar(dictationGrammar);
try
{
button1.Text = "Speak Now";
recognizer.SetInputToDefaultAudioDevice();
//recognizer.SetInputToAudioStream;
RecognitionResult result = recognizer.Recognize();
textBox1.Text = result.Text;
}
catch (InvalidOperationException exception)
{
button1.Text = String.Format("Could not recognize input from default aduio device. Is a microphone or sound card available?\r\n{0} - {1}.", exception.Source, exception.Message);
}
finally
{
recognizer.UnloadAllGrammars();
}
}
private void textBox1_TextChanged(object sender, EventArgs e)
{
}
Keyboard hook: http://www.codeproject.com/Articles/19004/A-Simple-C-Global-Low-Level-Keyboard-Hook

There is no difference between real very frequent keypresses and keypresses emulated when you hold the button.
You'd need to use a timeout, if the down was not emitted after 0.5 seconds after up, you stop the recording.
You can access hold state of the key if you still want to use a hook, but you need to use a bit lower level access. You can explore WM_KEYDOWN event and look for a bit 30 of the key state:
How to disable WM_KEYDOWN repeat event for Shift/Ctrl/Alt key?

Speech Recognition windows 10

I'm trying to build a speech recognition in Windows 10 (using Cortana) in Visual C#.
This is part of my code for speech recognition using old System.Speech.Recognition and works great, but it only support english.
SpeechSynthesizer sSynth = new SpeechSynthesizer();
PromptBuilder pBuilder = new PromptBuilder();
SpeechRecognitionEngine sRecognize = new SpeechRecognitionEngine();
Choices sList = new Choices();
private void Form1_Load(object sender, EventArgs e)
{
}
private void button1_Click(object sender, EventArgs e)
{
pBuilder.ClearContent();
pBuilder.AppendText(textBox2.Text);
sSynth.Speak(pBuilder);
}
private void button2_Click(object sender, EventArgs e)
{
button2.Enabled = false;
button3.Enabled = true;
sList.Add(new string[] { "who are you", "play a song" });
Grammar gr = new Grammar(new GrammarBuilder(sList));
try
{
sRecognize.RequestRecognizerUpdate();
sRecognize.LoadGrammar(gr);
sRecognize.SpeechRecognized += sRecognize_SpeechRecognized;
sRecognize.SetInputToDefaultAudioDevice();
sRecognize.RecognizeAsync(RecognizeMode.Multiple);
}
catch (Exception ex)
{
MessageBox.Show(ex.Message, "Error");
}
}
private void sRecognize_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
textBox1.Text = textBox1.Text + " " + e.Result.Text.ToString() + "\r\n";
}
How can I do it using new speech recognition in windows 10?

Use Microsoft Speech Platform SDK v11.0 (Microsoft.Speech.Recognition).
It works like System.Speech, but you can use Italian language (separeted install) and also use SRGS Grammar. I work with both kinect (SetInputToAudioStream) and default input device (SetInputToDefaultAudioDevice) without hassle.
Also it works offline, so no need to be online as with Cortana.
With the SRGS grammar you can get a decent level of complexity for your commands
UPDATE
Here is how I initialize the recognizer
private RecognizerInfo GetRecognizer(string culture, string recognizerId)
{
try
{
foreach (var recognizer in SpeechRecognitionEngine.InstalledRecognizers())
{
if (!culture.Equals(recognizer.Culture.Name, StringComparison.OrdinalIgnoreCase)) continue;
if (!string.IsNullOrWhiteSpace(recognizerId))
{
string value;
recognizer.AdditionalInfo.TryGetValue(recognizerId, out value);
if ("true".Equals(value, StringComparison.OrdinalIgnoreCase))
return recognizer;
}
else
return recognizer;
}
}
catch (Exception e)
{
log.Error(m => m("Recognizer not found"), e);
}
return null;
}
private void InitializeSpeechRecognizer(string culture, string recognizerId, Func<Stream> audioStream)
{
log.Debug(x => x("Initializing SpeechRecognizer..."));
try
{
var recognizerInfo = GetRecognizer(culture, recognizerId);
if (recognizerInfo != null)
{
recognizer = new SpeechRecognitionEngine(recognizerInfo.Id);
//recognizer.LoadGrammar(VoiceCommands.GetCommandsGrammar(recognizerInfo.Culture));
recognizer.LoadGrammar(grammar);
recognizer.SpeechRecognized += SpeechRecognized;
recognizer.SpeechRecognitionRejected += SpeechRejected;
if (audioStream == null)
{
log.Debug(x => x("...input on DefaultAudioDevice"));
recognizer.SetInputToDefaultAudioDevice();
}
else
{
log.Debug(x => x("SpeechRecognizer input on CustomAudioStream"));
recognizer.SetInputToAudioStream(audioStream(), new SpeechAudioFormatInfo(EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null));
}
}
else
{
log.Error(x => x(Properties.Resources.SpeechRecognizerNotFound, recognizerId));
throw new Exception(string.Format(Properties.Resources.SpeechRecognizerNotFound, recognizerId));
}
log.Debug(x => x("...complete"));
}
catch (Exception e)
{
log.Error(m => m("Error while initializing SpeechEngine"), e);
throw;
}
}

Cortana API usage example is here. You can copy it and start modifying according to your needs. It creates a dialog with the user. You can not exactly reproduce your System.Speech code with Cortana API because it is designed for another purpose. If you still want to recognize just few words, you can continue using System.Speech API.
System.Speech API supports other languages, not just English. You can find more information here:
Change the language of Speech Recognition Engine library

Erratic behaviour of voice synthesis in WinRT

I have a universal app the uses voice synthesis. Running under WP8.1, it works fine, but as soon as I try Win8.1 I start getting strange behaviour. The actual voice seems to speak once, however, on the second run (within the same app), the following code hangs:
string toSay = "hello";
System.Diagnostics.Debug.WriteLine("{0}: Speak {1}", DateTime.Now, toSay);
using (SpeechSynthesizer synth = new SpeechSynthesizer())
{
System.Diagnostics.Debug.WriteLine("{0}: After sythesizer instantiated", DateTime.Now);
var voiceStream = await synth.SynthesizeTextToStreamAsync(toSay);
System.Diagnostics.Debug.WriteLine("{0}: After voice stream", DateTime.Now);
The reason for the debug statements is that the code seems to have an uncertainty principle to it. That is, when I debug through it, the code executes and passes the SynthesizeTextToStreamAsync statement. However, when the breakpoits are removed, I only get the debug statement preceding it - never the one after.
The best I can deduce is that during the first run through something bad happens (it does claim to complete and actually speaks the first time), then it gets stuck and can't play any more. The full code looks similar to this:
string toSay = "hello";
System.Diagnostics.Debug.WriteLine("{0}: Speak {1}", DateTime.Now, toSay);
using (SpeechSynthesizer synth = new SpeechSynthesizer())
{
System.Diagnostics.Debug.WriteLine("{0}: After sythesizer instantiated", DateTime.Now);
var voiceStream = await synth.SynthesizeTextToStreamAsync(toSay);
System.Diagnostics.Debug.WriteLine("{0}: After voice stream", DateTime.Now);
MediaElement mediaElement;
mediaElement = rootControl.Children.FirstOrDefault(a => a as MediaElement != null) as MediaElement;
if (mediaElement == null)
{
mediaElement = new MediaElement();
rootControl.Children.Add(mediaElement);
}
mediaElement.SetSource(voiceStream, voiceStream.ContentType);
mediaElement.Volume = 1;
mediaElement.IsMuted = false;
var tcs = new TaskCompletionSource<bool>();
mediaElement.MediaEnded += (o, e) => { tcs.TrySetResult(true); };
mediaElement.MediaFailed += (o, e) => { tcs.TrySetResult(true); };
mediaElement.Play();
await tcs.Task;

Okay - I think I managed to get this working... although I'm unsure why.
using (SpeechSynthesizer synth = new SpeechSynthesizer())
{
var voiceStream = await synth.SynthesizeTextToStreamAsync(toSay);
MediaElement mediaElement;
mediaElement = rootControl.Children.FirstOrDefault(a => a as MediaElement != null) as MediaElement;
if (mediaElement == null)
{
mediaElement = new MediaElement();
rootControl.Children.Add(mediaElement);
}
mediaElement.SetSource(voiceStream, voiceStream.ContentType);
mediaElement.Volume = 1;
mediaElement.IsMuted = false;
var tcs = new TaskCompletionSource<bool>();
mediaElement.MediaEnded += (o, e) => { tcs.TrySetResult(true); };
mediaElement.Play();
await tcs.Task;
// Removing the control seems to free up whatever is locking
rootControl.Children.Remove(mediaElement);
}

I am not sure what program language you are using. However this may help. This is in C# so this could help lead you in the right direction.
namespace Alexis
{
public partial class frmMain : Form
{
SpeechRecognitionEngine _recognizer = new SpeechRecognitionEngine();
SpeechSynthesizer Alexis = new SpeechSynthesizer();
SpeechRecognitionEngine startlistening = new SpeechRecognitionEngine();
DateTime timenow = DateTime.Now;
}
//other coding such as InitializeComponent and others.
//
//
//
//
private void frmMain_Load(object sender, EventArgs e)
{
_recognizer.SetInputToDefaultAudioDevice();
_recognizer.LoadGrammarAsync(new Grammar(new GrammarBuilder(new Choices(File.ReadAllLines(#"Default Commands.txt")))));
_recognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(Shell_SpeechRecognized);
_recognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(Social_SpeechRecognized);
_recognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(Web_SpeechRecognized);
_recognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(Default_SpeechRecognized);
_recognizer.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(AlarmClock_SpeechRecognized);
_recognizer.LoadGrammarAsync(new Grammar(new GrammarBuilder(new Choices(AlarmAM))));
_recognizer.LoadGrammarAsync(new Grammar(new GrammarBuilder(new Choices(AlarmPM))));
_recognizer.SpeechDetected += new EventHandler<SpeechDetectedEventArgs>(_recognizer_SpeechDetected);
_recognizer.RecognizeAsync(RecognizeMode.Multiple);
startlistening.SetInputToDefaultAudioDevice();
startlistening.LoadGrammarAsync(new Grammar(new GrammarBuilder(new Choices("alexis"))));
startlistening.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(startlistening_SpeechRecognized);
//other stuff here..... Then once you have this then you can generate a method then with your code as follows
//
//
//
private void Default_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
int ranNum;
string speech = e.Result.Text;
switch (speech)
{
#region Greetings
case "hello":
case "hello alexis":
timenow = DateTime.Now;
if (timenow.Hour >= 5 && timenow.Hour < 12)
{ Alexis.SpeakAsync("Goodmorning " + Settings.Default.User); }
if (timenow.Hour >= 12 && timenow.Hour < 18)
{ Alexis.SpeakAsync("Good afternoon " + Settings.Default.User); }
if (timenow.Hour >= 18 && timenow.Hour < 24)
{ Alexis.SpeakAsync("Good evening " + Settings.Default.User); }
if (timenow.Hour < 5)
{ Alexis.SpeakAsync("Hello " + Settings.Default.User + ", it's getting late"); }
break;
case "whats my name":
case "what is my name":
Alexis.SpeakAsync(Settings.Default.User);
break;
case "stop talking":
case "quit talking":
Alexis.SpeakAsyncCancelAll();
ranNum = rnd.Next(1, 2);
if (ranNum == 2)
{ Alexis.Speak("sorry " + Settings.Default.User); }
break;
}
}
instead of using the commands in the code. I recommend that you use a text document. once you have that then you can add your own commands to it then put it in code. Also reference the System.Speech.
I hope this helps on getting you on the right track.

Develop Reference

C# (C-Sharp) is a programming language developed by Microsoft that runs on the .NET Framework.

How to convert speech to text? - c#

Related

Detecting specific phrase to start recognition for command

C# Voice Command: Wait for input

c# - Record audio on keydown

Speech Recognition windows 10

Erratic behaviour of voice synthesis in WinRT

Categories

Resources