IBM Watson Speech to Text Service is not giving response in Unity3d - c#

I have an ExampleSstreaming class which actually I got from GitHub of IBM Watson SDK (speech to text service demo). Here it is
public class ExampleStreaming : MonoBehaviour
{
private int m_RecordingRoutine = 0;
private string m_MicrophoneID = null;
private AudioClip m_Recording = null;
private int m_RecordingBufferSize = 5;
private int m_RecordingHZ = 22050;
private SpeechToText m_SpeechToText = new SpeechToText();
void Start()
{
LogSystem.InstallDefaultReactors();
Log.Debug("ExampleStreaming", "Start();");
Active = true;
Debug.Log("start");
StartRecording();
}
public void Update() {
Debug.Log(m_SpeechToText.IsListening);
}
public bool Active
{
get { return m_SpeechToText.IsListening; }
set
{
if (value && !m_SpeechToText.IsListening)
{
m_SpeechToText.DetectSilence = true;
m_SpeechToText.EnableWordConfidence = false;
m_SpeechToText.EnableTimestamps = false;
m_SpeechToText.SilenceThreshold = 0.03f;
m_SpeechToText.MaxAlternatives = 1;
m_SpeechToText.EnableContinousRecognition = true;
m_SpeechToText.EnableInterimResults = true;
m_SpeechToText.OnError = OnError;
m_SpeechToText.StartListening(OnRecognize);
}
else if (!value && m_SpeechToText.IsListening)
{
m_SpeechToText.StopListening();
}
}
}
private void StartRecording()
{
if (m_RecordingRoutine == 0)
{
Debug.Log("m_RecordingRoutine");
UnityObjectUtil.StartDestroyQueue();
m_RecordingRoutine = Runnable.Run(RecordingHandler());
}
}
private void StopRecording()
{
if (m_RecordingRoutine != 0)
{
Microphone.End(m_MicrophoneID);
Runnable.Stop(m_RecordingRoutine);
m_RecordingRoutine = 0;
}
}
private void OnError(string error)
{
Active = false;
Log.Debug("ExampleStreaming", "Error! {0}", error);
}
private IEnumerator RecordingHandler()
{
Log.Debug("ExampleStreaming", "devices: {0}", Microphone.devices);
m_MicrophoneID = Microphone.devices[0];
Debug.Log("m_MicrophoneID : " + m_MicrophoneID);
m_Recording = Microphone.Start(m_MicrophoneID, true, m_RecordingBufferSize, m_RecordingHZ);
yield return null; // let m_RecordingRoutine get set..
Debug.Log("m_Recording : " + m_Recording.length);
if (m_Recording == null)
{
Debug.Log("m_Recording is null");
StopRecording();
yield break;
}
bool bFirstBlock = true;
int midPoint = m_Recording.samples / 2;
float[] samples = null;
while (m_RecordingRoutine != 0 && m_Recording != null)
{
int writePos = Microphone.GetPosition(m_MicrophoneID);
if (writePos > m_Recording.samples || !Microphone.IsRecording(m_MicrophoneID))
{
Log.Error("MicrophoneWidget", "Microphone disconnected.");
StopRecording();
yield break;
}
if ((bFirstBlock && writePos >= midPoint)
|| (!bFirstBlock && writePos < midPoint))
{
// front block is recorded, make a RecordClip and pass it onto our callback.
samples = new float[midPoint];
m_Recording.GetData(samples, bFirstBlock ? 0 : midPoint);
AudioData record = new AudioData();
record.MaxLevel = Mathf.Max(samples);
record.Clip = AudioClip.Create("Recording", midPoint, m_Recording.channels, m_RecordingHZ, false);
record.Clip.SetData(samples, 0);
m_SpeechToText.OnListen(record);
bFirstBlock = !bFirstBlock;
}
else
{
// calculate the number of samples remaining until we ready for a block of audio,
// and wait that amount of time it will take to record.
int remaining = bFirstBlock ? (midPoint - writePos) : (m_Recording.samples - writePos);
float timeRemaining = (float)remaining / (float)m_RecordingHZ;
yield return new WaitForSeconds(timeRemaining);
}
}
yield break;
}
private void OnRecognize(SpeechRecognitionEvent result)
{
Debug.Log("OnRecognize");
if (result != null && result.results.Length > 0)
{
foreach (var res in result.results)
{
foreach (var alt in res.alternatives)
{
string text = alt.transcript;
Debug.Log(text);
Log.Debug("ExampleStreaming", string.Format("{0} ({1}, {2:0.00})\n", text, res.final ? "Final" : "Interim", alt.confidence));
}
}
}
}
}
and this is the line i add to get microphone. I just edit it to provide Microphone Device at zero index which was actually null (I don't know why, is this intentionally left or an error), in the function RecordingHandler .
m_MicrophoneID = Microphone.devices[0];
but unfortunately it is not showing any output log in EventOnRecognize which i think that it should execute.
Wile it displaying these logs, after some seconds (as i given length 5 of the audio). What i am doing wrong, i am unable to understand that how speech to text.
[DEBUG] OnListenClosed(), State = DISCONNECTED
[DEBUG] KeepAlive exited.
I have also tried IBM Watson Speech To text Scene it is also not showing anything.

I am not able to stream real-time output yet but become able to convert audio clip into text through watson service and here is the simple code (which took three days).
using UnityEngine;
using System.Collections;
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1;
public class AudioClipToTextWatson : MonoBehaviour {
// Non-streaming
SpeechToText m_SpeechToText = new SpeechToText();
public AudioClip m_AudioClip = new AudioClip();
public bool on = false;
void Start () {
m_AudioClip = Microphone.Start(Microphone.devices[0], false, 4, 44100);
m_SpeechToText.Recognize(m_AudioClip, OnRecognize);
// Streaming
m_SpeechToText.StartListening(OnRecognize);
// Stop listening
m_SpeechToText.StopListening();
}
private void OnRecognize(SpeechRecognitionEvent result)
{
Debug.Log("result : " + result);
if (result != null && result.results.Length > 0)
{
foreach (var res in result.results)
{
foreach (var alt in res.alternatives)
{
string text = alt.transcript;
Debug.Log(text);
Debug.Log(res.final);
}
}
}
}
}
Note :You can record and an audio clip using your microphone and convert it to text. If you already have an audio then drop it to inspector and comment out the first line in Start Event.

I solved Error
I encounter the same issue with Unity 2018.3.14f1.
I just change player settings and then works fine
file -> build settings - > player settings -> Other Settings
Configuration
Scripting runtime version : .Net 4x equivalent
API Compatibility level: .Net 4x

Related

C# loop edit (I can't code, just trying to make a simple edit)

I need this pre-existing script to loop every forever, I do not know much code and am unable to edit this myself, I'm hoping one of you smart boys can assist in the editing of this script.
This script is C# built for a game called Space Engineers where it takes information from a solar panel to determine time of day/light level and respond by turning on the lights or turning off the lights.
//How to use:
//Setup a timer that triggers it self and this programmable block every 5-10 minutes. Then set the light strength(
//when the solar panel gets less kW's than that, the lights are turned on). Then set measureSolarPanelName to
//the customname of the solar panel that you want to use to measure the light. If you want to use a prefix, set
//usePrefix to true und set a prefix. When you set overwrite to true, this script will stop
//working until you deactivate overwrite(= false), for eg. battles.
//Thank you for using my script :)
//Settings:
int lightStrength = 10;
string measureSolarPanelName = "Solar Panel";
bool overwrite = false;
bool usePrefix = false;
string prefix = "[NL]";
//Code
public void Main(string argument) {
if (getSolarPower() < lightStrength) {
triggerLights(1);
} else {
triggerLights(0);
}
}
int getSolarPower () {
bool watts;
string info = GridTerminalSystem.GetBlockWithName(measureSolarPanelName).DetailedInfo;
var lines = info.Split('\n');
var output = lines[1].Split(':')[1].Trim();
var final = output.Replace(" kW", "");
if (final.Contains("W")) { final = final.Replace(" W", ""); watts = true; } else watts = false;
var finalb = final.Split ('.')[0];
if (watts) { finalb = "1"; }
return Int32.Parse(finalb);
}
void triggerLights(int status) {
if (status == 1 && !overwrite) {
List<IMyInteriorLight> lights = new List<IMyInteriorLight> ();
GridTerminalSystem.GetBlocksOfType<IMyInteriorLight>(lights);
List<IMyReflectorLight> spotlights = new List<IMyReflectorLight> ();
GridTerminalSystem.GetBlocksOfType<IMyReflectorLight>(spotlights);
for (int i = 0; i < lights.Count; i++) {
if (getPrefixBool(lights[i].CustomName)) {
lights[i].GetActionWithName ("OnOff_On").Apply(lights[i]);
}
}
for (int i = 0; i < spotlights.Count; i++) {
if (getPrefixBool(spotlights[i].CustomName)) {
spotlights[i].GetActionWithName("OnOff_On").Apply(spotlights[i]);
}
}
}
else
if (status == 0 && !overwrite) {
List<IMyInteriorLight> lights = new List<IMyInteriorLight> ();
GridTerminalSystem.GetBlocksOfType<IMyInteriorLight>(lights);
List<IMyReflectorLight> spotlights = new List<IMyReflectorLight> ();
GridTerminalSystem.GetBlocksOfType<IMyReflectorLight>(spotlights);
for (int i = 0; i < lights.Count; i++) {
if (getPrefixBool(lights[i].CustomName)) {
lights[i].GetActionWithName("OnOff_Off").Apply(lights[i]);
}
}
for (int i = 0; i < spotlights.Count; i++) {
if (getPrefixBool(spotlights[i].CustomName)) {
spotlights[i].GetActionWithName("OnOff_Off").Apply(spotlights[i]);
}
}
}
}
bool getPrefixBool (string name) {
if (usePrefix) {
if (name.StartsWith(prefix)) {
return true;
} else return false;
} else return true;
}
I haven't tried much at this point as I can't exactly code... I have found a couple lines but they don't seem to work.

Watch window CS0103 - follow up to exclude my code as problem

Problem after updating VS2022 viewing the watches. After an answer with proposal to post the code, I do so to exclude the problem is caused by my code prior to reporting a bug.
The original question: Watch window at debugging: CS0103: The name '' does not exists in the current context. Version and solution specifics are stated there.
The code is merely to get something on screen.
Things I tried afterwards:
Creating a new solution, adding a windows form app and windows controll library. Created a class with values.
No problem here,
Doing the same but, also a new solution, pasted the code of StringParser into the library and the relevant code of the constructor of Form1,
Again a new solution where the StringParser is part of only a windows forms app,
Both the same problem.
The image contains a screenshot of the watch window. Also from the code file to show debugging state. (The code is as formatted text below).
All projects are 'out-of-the-box'
Library - StrangParser.cs
namespace html
{
public enum Pin
{
Start,
End,
Both,
}
public class StringParser
{
private string content = "";
public string Content { get { return content; } set { if (content != value) content = value; if (content.Length > 0) { position = start = 0; end = 0; } } }
private int position = -1;
private int start = -1;
private int end = -1;
public bool Next()
{
++position;
if (position > content.Length)
position = content.Length;
if (position > end)
end = position;
return (position >= content.Length);
}
public bool Next(char to_char, bool include = true)
{
while (position < content.Length && content[position] != to_char)
{
++position;
if (position > end)
end = position;
}
if (include) ++position;
if (position > content.Length)
position = content.Length;
if (position > end)
end = position;
return (position >= content.Length);
}
public bool Previous()
{
--position;
if (position < 0)
position = 0;
if (position < start)
start = position;
return (position ==0);
}
public string Token
{
get
{
return start >= 0 && end <= content.Length && end > start
? content.Substring(start, end - start)
: "";
}
}
public void Pin(Pin pin)
{
if (pin == html.Pin.Start || pin == html.Pin.Both)
start = position;
if (pin == html.Pin.End || pin == html.Pin.Both)
end = position;
}
public override string ToString()
{
if (content == null || content == "")
return "";
string s = content.Substring(0, start);
string t = Token;
string e = content.Substring(end, content.Length - end);
if (s.Length > 15) s = "..." + s.Substring(s.Length - 15);
if (e.Length > 15) e = e.Substring(0, 15) + "...";
return string.Format("[{0}-{1}-{2}] {3} |--> '{4}' <--| {5}", start, position, end, s, t, e);
}
}
}
Form App - Form1.cs - code
using System.Windows.Forms;
using html;
namespace contentdownloader
{
public partial class Form1 : Form
{
string filecontent = "<html><head></head><body></body></html>";
StringParser watch_parser = null;
string watch_token = null;
public Form1()
{
InitializeComponent();
StringParser parser = new StringParser();
watch_parser = parser;
parser.Content = filecontent;
string token = "";
while (!parser.Next('<'))
{
//parser.Pin(html.Pin.Both);
parser.Next('>');
token = watch_token = parser.Token;
parser.Pin(html.Pin.Both);
}
}
}
}
You can change your code like this, define parser and token before the function InitializeComponent() of Form1:
public partial class Form1 : Form
{
string filecontent = "<html><head></head><body></body></html>";
StringParser parser = new StringParser();
string token = "";
public Form1()
{
InitializeComponent();
parser.Content = filecontent;
while (!parser.Next('<'))
{
parser.Next('>');
token = parser.Token;
}
}
}
Here is my test result:

SimpleAudioPlayer CurrentPosition and Duration are always 0

I'm trying to make a playlist in Xamarin Forms using a list of audios that can be added in a ListView through other ListViews.
I'm using the SimpleAudioPlayer plugin in order to reach this.
I would like to get multiple audios played consecutively.
The play function is working fine and the Stream doesn't throw any Exception.
Right now, the code plays always the same audio multiple times.
To reach the consecutive effect, I tried to make a while loop using CurrentPosition and Duration properties inside a for loop for each track in the playlist, but I didn't find any self esplicative documentation about these two properties.
using G.Models;
using Plugin.SimpleAudioPlayer;
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Diagnostics;
using System.IO;
using System.Reflection;
using System.Windows.Input;
using Xamarin.Forms;
namespace G.ViewModels
{
class ABSViewModel : BaseViewModel{
public ObservableCollection<PlayListItem> Playlist { get; set; }
public ICommand PlayPlaylistCommand { get; set; }
private PlayListItem playlistSelectedItem;
public PlayListItem PlaylistSelectedItem
{
get {
return playlistSelectedItem;
}
set {
playlistSelectedItem = value;
}
}
public ABSViewModel()
{
audio = CrossSimpleAudioPlayer.CreateSimpleAudioPlayer();
PlayButtonText = "PLAY";
PlayButtonImage = "play_icon.png";
PlayButtonFlag = "1";
PlayPlaylistCommand = new Command(PlayPlaylist);
Playlist = new ObservableCollection<PlayListItem>();
}
private async void PlayPlaylist()
{
if (PlayButtonFlag == "1")
{
PlayButtonText = "PAUSE";
PlayButtonImage = "pause_icon.png";
PlayButtonFlag = "0";
int startPosition;
if(PlaylistSelectedItem != null)
{
startPosition = 0;
}
else
{
startPosition = Playlist.IndexOf(playlistSelectedItem);
}
for(int i = 0; i < Playlist.Count; i++)
{
if (audio.IsPlaying) audio.Stop();
PlaylistSelectedItem = Playlist[i];
OnPropertyChanged(nameof(Playlist));
audio.Load(GetStreamFromFile(Playlist[i].Name + ".wav"));
audio.Play();
while (audio.CurrentPosition != audio.Duration) {
Debug.WriteLine("Test");
}
}
}
else
{
PlayButtonText = "PLAY";
PlayButtonImage = "play_icon.png";
PlayButtonFlag = "1";
if (audio.IsPlaying) audio.Pause();
}
NotifyButtonChange();
}
Stream GetStreamFromFile(string filename)
{
var assembly = typeof(App).GetTypeInfo().Assembly;
var stream = assembly.GetManifestResourceStream("G2019." + "RSNZBile.wav");
return stream;
}
private void NotifyButtonChange()
{
OnPropertyChanged(nameof(PlayButtonText));
OnPropertyChanged(nameof(PlayButtonImage));
OnPropertyChanged(nameof(PlayButtonFlag));
}
}
}
I omit the Commands to add elements to the playlist or to stop the playlist from playing because I think they are out of context.
Now, when I press the "Play" button, I see in the debugger that my while cycle did not start at all. So, the player plays all the elements of the playlist in few milliseconds and the only one the human can hear is the last one. That is because my CurrentPosition and Duration properties are both set to 0. Why?
Did anyone try to do something like this?
If anyone is interested, I solved the problem using PlaybackEnded event.
Every piece of code that was inside the for loop was moved inside the PlaybackEnded handler function with no need of a while. The Duration value set to 0 is still a mystery.
private async void PlayPlaylist(string value)
{
if (PlayButtonMode == 1)
{
int startPosition;
//Playing playlist from selected item if user selected one. (first item if he didn't)
if (PlaylistSelectedItem == null) startPosition = 0;
else startPosition = Playlist.IndexOf(playlistSelectedItem);
if (audio.IsPlaying) audio.Stop();
PlaylistSelectedItem = Playlist[startPosition];
audio.Load(GetStreamFromFile(PlaylistSelectedItem.Name + ".wav"));
audio.Play();
audio.PlaybackEnded += Audio_PlaybackEnded;
}
else if (PlayButtonMode == 2)
audio.Play();
else
{
if (audio.IsPlaying) audio.Pause();
}
ChangeButtonMode(PlayButtonMode);
}
private void ChangeButtonMode(int senderMode)
{
if (senderMode == 1 || senderMode == 2)
{
PlayButtonText = "PAUSE";
PlayButtonImage = "pause_icon.png";
PlayButtonMode = 0;
}
else
{
PlayButtonText = "PLAY";
PlayButtonImage = "play_icon.png";
if (senderMode == 3) PlayButtonMode = 1;
else PlayButtonMode = 2;
}
NotifyButtonChange();
}
private void Audio_PlaybackEnded(object sender, EventArgs e)
{
//Check if next playlist element exists
if(Playlist.IndexOf(PlaylistSelectedItem) + 1 < Playlist.Count)
{
PlaylistSelectedItem = Playlist[Playlist.IndexOf(PlaylistSelectedItem) + 1];
if (audio.IsPlaying) audio.Stop();
audio.Load(GetStreamFromFile(PlaylistSelectedItem.Name + ".wav"));
audio.Play();
audio.PlaybackEnded += Audio_PlaybackEnded;
}else if(audio.IsPlaying) audio.Stop();
}
private void StopAudio()
{
if (audio.IsPlaying)
audio.Stop();
ChangeButtonMode(3);
}

How to make a speech-to-text custom model with Watson Unity SDK?

I have made an application, with Watson Assistant, speech-to-text and text to speech in Unity, where the user can say different cities to find available air plane tickets between said cities. The conversation and interactions are working great, but sometimes I have the problem that some cities aren't recognised when the user says them. For example Berlin, sometimes it understands Berlin and another time burning. The same goes for other cities like Paris, London and Jakarta.
So the detection of city names isn't always as accurate as I had hoped. But I saw in some posts that you can make your own custom model to improve the detection of those words. But I have no idea how to set that up, make an own custom model and how to add those cities to the model and train it. Is it possible to do that in Unity C# scripting and how would i start with it? Are there some C# examples that I can look at? Any help would be appreciated.
These are some links and information that I found, but have no idea of how to implement it in C# and for my own purpose in relation of improving the accuracy of city detection.
DwAnswers1 DwAnswers2 StackOverflow IBM clouds docs Medium cURL tutorial
This is the C# script I have for my interaction between the Watson API and Unity. I think i have to add the custom model in here too, but I don't know if i should create the custom model in it too, or if it needs to be in a seperate script.
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using IBM.Watson.DeveloperCloud.Services.TextToSpeech.v1;
using IBM.Watson.DeveloperCloud.Services.Conversation.v1;
using IBM.Watson.DeveloperCloud.Services.ToneAnalyzer.v3;
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1;
using IBM.Watson.DeveloperCloud.Logging;
using IBM.Watson.DeveloperCloud.Utilities;
using IBM.Watson.DeveloperCloud.Connection;
using IBM.Watson.DeveloperCloud.DataTypes;
using MiniJSON;
using UnityEngine.UI;
using FullSerializer;
public class WatsonAgent : MonoBehaviour
{
public string literalEntityCity;
public string destinationCity;
public string departureCity;
public string dateBegin;
public string dateEnd;
public WeatherJSON weather;
public GameObject FlightInfo;
[SerializeField]
private fsSerializer _serializer = new fsSerializer();
[System.Serializable]
public class CredentialInformation
{
public string username, password, url;
}
[System.Serializable]
public class Services
{
public CredentialInformation
textToSpeech,
conversation,
speechToText;
}
[Header("Credentials")]
[Space]
public Services
serviceCredentials;
[Space]
[Header("Agent voice settings")]
[Space]
public AudioSource
voiceSource;
public VoiceType
voiceType;
[Space]
[Header("Conversation settings")]
[Space]
public string
workspaceId;
[Space]
[Header("Feedback fields")]
[Space]
public Text
speechToTextField;
public Text
conversationInputField;
public Text
conversationOutputField;
public string
saying;
// services
SpeechToText
speechToText;
private int
recordingRoutine = 0,
recordingBufferSize = 1,
recordingHZ = 22050;
private string
microphoneID = null;
private AudioClip
recording = null;
TextToSpeech
textToSpeech;
Conversation
conversation;
private Dictionary<string, object>
conversationContext = null;
private void Start()
{
PrepareCredentials();
Initialize();
}
void PrepareCredentials()
{
speechToText = new SpeechToText(GetCredentials(serviceCredentials.speechToText));
textToSpeech = new TextToSpeech(GetCredentials(serviceCredentials.textToSpeech));
conversation = new Conversation(GetCredentials(serviceCredentials.conversation));
}
Credentials GetCredentials(CredentialInformation credentialInformation)
{
return new Credentials(credentialInformation.username, credentialInformation.password, credentialInformation.url);
}
void Initialize()
{
conversation.VersionDate = "2017-05-26";
Active = true;
StartRecording();
}
// speech to text
public bool Active
{
get { return speechToText.IsListening; }
set
{
if (value && !speechToText.IsListening)
{
speechToText.DetectSilence = true;
speechToText.EnableWordConfidence = true;
speechToText.EnableTimestamps = true;
speechToText.SilenceThreshold = 0.01f;
speechToText.MaxAlternatives = 0;
speechToText.EnableInterimResults = true;
speechToText.OnError = OnSpeechError;
speechToText.InactivityTimeout = -1;
speechToText.ProfanityFilter = false;
speechToText.SmartFormatting = true;
speechToText.SpeakerLabels = false;
speechToText.WordAlternativesThreshold = null;
speechToText.StartListening(OnSpeechRecognize);
//speechToText.CustomizationId = "customID"; // I guess i have to add the custom training model here with the customID
//speechToText.CustomizationWeight(0.2); //
}
else if (!value && speechToText.IsListening)
{
speechToText.StopListening();
}
}
}
private void StartRecording()
{
if (recordingRoutine == 0)
{
UnityObjectUtil.StartDestroyQueue();
recordingRoutine = Runnable.Run(RecordingHandler());
}
}
private void StopRecording()
{
if (recordingRoutine != 0)
{
Microphone.End(microphoneID);
Runnable.Stop(recordingRoutine);
recordingRoutine = 0;
}
}
private void OnSpeechError(string error)
{
Active = false;
Log.Debug("ExampleStreaming.OnError()", "Error! {0}", error);
}
private IEnumerator RecordingHandler()
{
recording = Microphone.Start(microphoneID, true, recordingBufferSize, recordingHZ);
yield return null; // let _recordingRoutine get set..
if (recording == null)
{
StopRecording();
yield break;
}
bool bFirstBlock = true;
int midPoint = recording.samples / 2;
float[] samples = null;
while (recordingRoutine != 0 && recording != null)
{
int writePos = Microphone.GetPosition(microphoneID);
if (writePos > recording.samples || !Microphone.IsRecording(microphoneID))
{
Debug.Log("Microphone disconnected.");
StopRecording();
yield break;
}
if ((bFirstBlock && writePos >= midPoint) || (!bFirstBlock && writePos < midPoint))
{
// front block is recorded, make a RecordClip and pass it onto our callback.
samples = new float[midPoint];
recording.GetData(samples, bFirstBlock ? 0 : midPoint);
AudioData record = new AudioData();
record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
record.Clip = AudioClip.Create("Recording", midPoint, recording.channels, recordingHZ, false);
record.Clip.SetData(samples, 0);
speechToText.OnListen(record);
bFirstBlock = !bFirstBlock;
}
else
{
// calculate the number of samples remaining until we ready for a block of audio,
// and wait that amount of time it will take to record.
int remaining = bFirstBlock ? (midPoint - writePos) : (recording.samples - writePos);
float timeRemaining = (float)remaining / (float)recordingHZ;
yield return new WaitForSeconds(timeRemaining);
}
}
yield break;
}
private void OnSpeechRecognize(SpeechRecognitionEvent result, Dictionary<string, object> customData)
{
if (result != null && result.results.Length > 0)
{
foreach (var res in result.results)
{
foreach (var alt in res.alternatives)
{
string text = string.Format("{0} ({1}, {2:0.00})\n", alt.transcript, res.final ? "Final" : "Interim", alt.confidence);
if (speechToTextField != null)
{
speechToTextField.text = text;
}
if (res.final)
{
if (characterState == SocialState.listening)
{
Debug.Log("WATSON | Speech to text recorded: \n" + alt.transcript);
StartCoroutine(Message(alt.transcript));
}
}
else
{
if (characterState == SocialState.idle)
{
characterState = SocialState.listening;
}
}
}
}
}
}
// text to speech
private IEnumerator Synthesize(string text)
{
Debug.Log("WATSON CALL | Synthesize input: \n" + text);
textToSpeech.Voice = voiceType;
bool doSynthesize = textToSpeech.ToSpeech(HandleSynthesizeCallback, OnFail, text, true);
if (doSynthesize)
{
StartCoroutine(Analyze(text));
saying = text;
characterState = SocialState.talking;
}
yield return null;
}
void HandleSynthesizeCallback(AudioClip clip, Dictionary<string, object> customData = null)
{
if (Application.isPlaying && clip != null)
{
voiceSource.clip = clip;
voiceSource.Play();
}
}
// conversation
private IEnumerator Message(string text)
{
Debug.Log("WATSON | Conversation input: \n" + text);
MessageRequest messageRequest = new MessageRequest()
{
input = new Dictionary<string, object>()
{
{ "text", text }
},
context = conversationContext
};
bool doMessage = conversation.Message(HandleMessageCallback, OnFail, workspaceId, messageRequest);
if (doMessage)
{
characterState = SocialState.thinking;
if (conversationInputField != null)
{
conversationInputField.text = text;
}
}
yield return null;
}
void HandleMessageCallback(object resp, Dictionary<string, object> customData)
{
object _tempContext = null;
(resp as Dictionary<string, object>).TryGetValue("context", out _tempContext);
if (_tempContext != null)
conversationContext = _tempContext as Dictionary<string, object>;
string contextList = conversationContext.ToString();
Dictionary<string, object> dict = Json.Deserialize(customData["json"].ToString()) as Dictionary<string, object>;
Dictionary<string, object> output = dict["output"] as Dictionary<string, object>;
Debug.Log("JSON INFO: " + customData["json"].ToString());
// Send new/update context variables to the Watson Conversation Service
if (weather.temperatureCity != null && !conversationContext.ContainsKey("temperature"))
{
string currentTemperature = weather.temperatureNumber.ToString();
conversationContext.Add("temperature", currentTemperature);
}
else if (conversationContext.ContainsKey("temperature"))
{
string currentTemperature = weather.temperatureNumber.ToString();
conversationContext.Remove("temperature");
conversationContext.Add("temperature", currentTemperature);
//Debug.Log("Current Temperature: " + currentTemperature);
}
// $ call context variables
var context = dict["context"] as Dictionary<string, object>;
if (context["destination_city"] != null)
{
destinationCity = context["destination_city"].ToString();
Debug.Log("Destination city: " + destinationCity);
}
if (context["departure_city"] != null)
{
departureCity = context["departure_city"].ToString();
}
List<object> text = output["text"] as List<object>;
string answer = text[0].ToString(); //Geeft alleen de eerste response terug
Debug.Log("WATSON | Conversation output: \n" + answer);
if (conversationOutputField != null)
{
conversationOutputField.text = answer;
}
fsData fsdata = null;
fsResult r = _serializer.TrySerialize(resp.GetType(), resp, out fsdata);
if (!r.Succeeded)
{
throw new WatsonException(r.FormattedMessages);
}
//convert fsdata to MessageResponse
MessageResponse messageResponse = new MessageResponse();
object obj = messageResponse;
r = _serializer.TryDeserialize(fsdata, obj.GetType(), ref obj);
if (!r.Succeeded)
{
throw new WatsonException(r.FormattedMessages);
}
if (resp != null)
{
//Recognize intents & entities
if (messageResponse.intents.Length > 0 && messageResponse.entities.Length > 0)
{
string intent = messageResponse.intents[0].intent;
string entity = messageResponse.entities[0].entity;
string literalEntity = messageResponse.entities[0].value;
if (entity == "city")
{
literalEntityCity = literalEntity;
}
if (intent == "weather" && entity == "city")
{
literalEntityCity = literalEntity;
}
}
if (messageResponse.intents.Length > 0)
{
string intent = messageResponse.intents[0].intent;
//Debug.Log("Intent: " + intent); //intent name
}
if (messageResponse.entities.Length > 0)
{
string entity = messageResponse.entities[0].entity;
//Debug.Log("Entity: " + entity); //entity name
string literalEntity = messageResponse.entities[0].value;
//Debug.Log("Entity Literal: " + literalEntity); //literal spoken entity
if (entity == "city")
{
literalEntityCity = literalEntity;
}
}
}
StartCoroutine(Synthesize(answer));
}
}
The question you are asked is rather complex. I believe if you train a model, it should be using tools from Watson and nothing related with Unity.
But, what you can do in Unity is correcting the return word. That is, if you are expecting to get just names of cities, you can download a list of all the cities, let's say with more than 100.000 inhabitants (you can find this on Internet already), then you check if the returned word is in this list. For example:
http://download.geonames.org/export/dump/
In case it is not, you can consider it was poorly detected by Watson, so you can use something like Levenshtein distance to correct your returned word. Check this
Basically this algorithm tries to find how different two words are. It is possible to use other algorithms to check a given word, which is the most similar to it in a list. You may get some ideas from here or this other one

Noise artifacts during Naudio stream

I'm having some outputting my data as audio stream.
I've uploaded a sound snipped of a sine wave with the undesirable noise effect.
I think the issue is related that there is a small pause between data packages and somehow this produces this low, fairly regular pop sound
The AudioMonitor class gets its data via SetBuffer, each 250ms aprrox. ~ 6000 samples.
Where does the noise originate from exactly ?
What can be done to fix this ?
Edit *Seems to be an issue that the data isn't seamlessly coming in. After one package was played, there is a pause, the waveprovider idling and the clicking noise occurs.
Isn't it possible to just get a silence for the time there is no sample ready?*
Audacity view of the signal
using System;
using NAudio.Wave; // Version 1.7.0.13
namespace aws
{
public class AudioMonitor
{
private BufferedWaveProvider waveProvider;
private WaveOut wavePlayer;
int audioFilter = 0;
int sampleRate = 31250;
int resolution = 8;
private void CreateAudio()
{
if (wavePlayer != null)
{
StopAudio();
wavePlayer.Dispose();
wavePlayer = null;
waveProvider = null;
}
waveProvider = new BufferedWaveProvider(new WaveFormat(sampleRate, resolution, 1));
waveProvider.BufferLength = 31250; // standard value: 31250
wavePlayer = new WaveOut();
wavePlayer.DesiredLatency = 300; // standard value: 300
wavePlayer.NumberOfBuffers = 2; // standard value: 2
wavePlayer.Init(waveProvider);
}
public bool StartAudio()
{
CreateAudio();
if (wavePlayer != null)
{
wavePlayer.Play();
return true;
}
return false;
}
public void StopAudio()
{
if (wavePlayer != null)
{
wavePlayer.Stop();
}
}
public PlaybackState GetPlaybackState()
{
if (wavePlayer != null)
{
return wavePlayer.PlaybackState;
}
return PlaybackState.Stopped;
}
public void SetBuffer(byte[] b)
{
int indexWrite = 0, indexBuffer = 0, sizeBuffer;
byte[] nbuffer = b;
sizeBuffer = b.Length;
// valid data
for (indexWrite = indexBuffer; indexWrite < (indexBuffer + sizeBuffer); ++indexWrite)
{
nbuffer[indexWrite] = (Byte)((nbuffer[indexWrite]) + 128);
if ((nbuffer[indexWrite] < (128 + audioFilter)) && (nbuffer[indexWrite] > (128 - audioFilter)))
nbuffer[indexWrite] = 128;
}
try
{
waveProvider.AddSamples(nbuffer, indexBuffer, sizeBuffer);
}
catch (Exception e){
Console.WriteLine(e.StackTrace);
}
}
public void SetBuffer(double[] d, double fac)
{
byte[] tmpBuf = new byte[d.Length];
for(int i = 0; i < tmpBuf.Length; i++)
{
tmpBuf[i] = (byte)(d[i] / fac);
}
SetBuffer(tmpBuf);
}
public void SetAudioFilter(int f)
{
audioFilter = f*3; // min 0 max 60
}
public void Finalize()
{
if (wavePlayer != null)
{
wavePlayer.Stop();
wavePlayer.Dispose();
}
}
}
}

Categories

Resources