Noise artifacts during Naudio stream - c#

I'm having some outputting my data as audio stream.
I've uploaded a sound snipped of a sine wave with the undesirable noise effect.
I think the issue is related that there is a small pause between data packages and somehow this produces this low, fairly regular pop sound
The AudioMonitor class gets its data via SetBuffer, each 250ms aprrox. ~ 6000 samples.
Where does the noise originate from exactly ?
What can be done to fix this ?
Edit *Seems to be an issue that the data isn't seamlessly coming in. After one package was played, there is a pause, the waveprovider idling and the clicking noise occurs.
Isn't it possible to just get a silence for the time there is no sample ready?*
Audacity view of the signal
using System;
using NAudio.Wave; // Version 1.7.0.13
namespace aws
{
public class AudioMonitor
{
private BufferedWaveProvider waveProvider;
private WaveOut wavePlayer;
int audioFilter = 0;
int sampleRate = 31250;
int resolution = 8;
private void CreateAudio()
{
if (wavePlayer != null)
{
StopAudio();
wavePlayer.Dispose();
wavePlayer = null;
waveProvider = null;
}
waveProvider = new BufferedWaveProvider(new WaveFormat(sampleRate, resolution, 1));
waveProvider.BufferLength = 31250; // standard value: 31250
wavePlayer = new WaveOut();
wavePlayer.DesiredLatency = 300; // standard value: 300
wavePlayer.NumberOfBuffers = 2; // standard value: 2
wavePlayer.Init(waveProvider);
}
public bool StartAudio()
{
CreateAudio();
if (wavePlayer != null)
{
wavePlayer.Play();
return true;
}
return false;
}
public void StopAudio()
{
if (wavePlayer != null)
{
wavePlayer.Stop();
}
}
public PlaybackState GetPlaybackState()
{
if (wavePlayer != null)
{
return wavePlayer.PlaybackState;
}
return PlaybackState.Stopped;
}
public void SetBuffer(byte[] b)
{
int indexWrite = 0, indexBuffer = 0, sizeBuffer;
byte[] nbuffer = b;
sizeBuffer = b.Length;
// valid data
for (indexWrite = indexBuffer; indexWrite < (indexBuffer + sizeBuffer); ++indexWrite)
{
nbuffer[indexWrite] = (Byte)((nbuffer[indexWrite]) + 128);
if ((nbuffer[indexWrite] < (128 + audioFilter)) && (nbuffer[indexWrite] > (128 - audioFilter)))
nbuffer[indexWrite] = 128;
}
try
{
waveProvider.AddSamples(nbuffer, indexBuffer, sizeBuffer);
}
catch (Exception e){
Console.WriteLine(e.StackTrace);
}
}
public void SetBuffer(double[] d, double fac)
{
byte[] tmpBuf = new byte[d.Length];
for(int i = 0; i < tmpBuf.Length; i++)
{
tmpBuf[i] = (byte)(d[i] / fac);
}
SetBuffer(tmpBuf);
}
public void SetAudioFilter(int f)
{
audioFilter = f*3; // min 0 max 60
}
public void Finalize()
{
if (wavePlayer != null)
{
wavePlayer.Stop();
wavePlayer.Dispose();
}
}
}
}

Related

How to increase performance of Alpha-Beta-Pruning without increasing tree depth?

am programming an Nine-Mens-Morris game in WPF and it is almost finished. I want to make it harder to beat the AI, but I dont know how. Is there a way to improve without increasing tree depth? I don't want to increase the "thinking Time"
Here is the source code of the Alpha-Beta AI
using Młynek.Model;
using Młynek.Utils;
using System;
using System.Diagnostics;
using System.Threading.Tasks;
namespace Młynek.Players
{
class AlfaBetaSpieler : ISpieler
{
private FieldState _color;
private IEvaluateGameState _evaluator;
private AIHelper _aiHelper;
private Spiel _game;
private Spielzug _pendingCapture;
private long _nodesCount;
private int _depth;
public bool IsHuman => false;
public string TypeName => "AlphaBeta";
public AlfaBetaSpieler(FieldState color, int depth, GameStateEvaluator evaluator)
{
_color = color;
_evaluator = evaluator;
_depth = depth;
_aiHelper = new AIHelper();
}
public async Task<Spielzug> AIMove(Spiel game)
{
_nodesCount = 0;
Stopwatch timer = Stopwatch.StartNew();
Spielzug nextMove = await Task.Run(() =>
{
_game = game.Duplicate();
return AlfaBeta();
});
if (nextMove.Capture >= 0) _pendingCapture = new Spielzug(_color, nextMove.Capture);
timer.Stop();
nextMove.Time = timer.ElapsedMilliseconds;
nextMove.NodesVisited = _nodesCount;
return nextMove;
}
public Spielzug AICapture(Spiel game)
{
if (!_pendingCapture.IsValid()) throw new InvalidOperationException();
Spielzug capture = _pendingCapture;
_pendingCapture = new Spielzug();
return capture;
}
public Spielzug AlfaBeta()
{
int depth = _game.GetRound() == 2 ? _depth : 3;
Spielzug nextMove = AlfaBetaRecursion(depth, float.MinValue, float.MaxValue).Item2;
_game = null;
return nextMove;
}
public (float, Spielzug) AlfaBetaRecursion(int depth, float alfa, float beta)
{
_nodesCount++;
if (depth == 0 || _game.GameEnded)
{
return (_evaluator.EvaluateGameState(_game, _color), new Spielzug(FieldState.Empty));
}
Spielzug[] possibleMoves = _aiHelper.GetAvaiableMoves(_game);
//GIVE-UP PSEUDO MOVE
if (possibleMoves.Length == 0)
{
Spielzug giveUp = new Spielzug(_game.NextPlayer);
_game.MakeMove(giveUp);
return ((AlfaBetaRecursion(depth - 1, alfa, beta).Item1, giveUp));
}
// MAXIMIZING PLAYER
if (_game.NextPlayer == _color)
{
var max = (Item1: float.MinValue, new Spielzug(FieldState.Empty));
for (int i = 0; i < possibleMoves.Length; i++)
{
Spielzug move = possibleMoves[i];
_game.MakeMove(move);
if (move.CreatesMill) _game.Capture(new Spielzug(move.Player, move.Capture));
if (max.Item1 >= alfa) alfa = max.Item1;
var eval = ((AlfaBetaRecursion(depth - 1, alfa, beta).Item1, move));
_game.Undo();
if (eval.Item1 >= beta) return eval;
if (eval.Item1 >= max.Item1) max = eval;
}
return max;
}
// MINIMIZING PLAYER
else
{
var min = (Item1: float.MaxValue, new Spielzug(FieldState.Empty));
for (int i = 0; i < possibleMoves.Length; i++)
{
Spielzug move = possibleMoves[i];
_game.MakeMove(move);
if (move.CreatesMill) _game.Capture(new Spielzug(move.Player, move.Capture));
if (min.Item1 <= beta) beta = min.Item1;
var eval = ((AlfaBetaRecursion(depth - 1, alfa, beta).Item1, move));
_game.Undo();
if (eval.Item1 <= alfa) return eval;
if (eval.Item1 <= min.Item1) min = eval;
}
return min;
}
}
public void Move(Spiel game)
{
throw new NotImplementedException();
}
public void Capture(Spiel game)
{
throw new NotImplementedException();
}
}
}
This AI does work but for me it isn't efficient enough.

SimpleAudioPlayer CurrentPosition and Duration are always 0

I'm trying to make a playlist in Xamarin Forms using a list of audios that can be added in a ListView through other ListViews.
I'm using the SimpleAudioPlayer plugin in order to reach this.
I would like to get multiple audios played consecutively.
The play function is working fine and the Stream doesn't throw any Exception.
Right now, the code plays always the same audio multiple times.
To reach the consecutive effect, I tried to make a while loop using CurrentPosition and Duration properties inside a for loop for each track in the playlist, but I didn't find any self esplicative documentation about these two properties.
using G.Models;
using Plugin.SimpleAudioPlayer;
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Diagnostics;
using System.IO;
using System.Reflection;
using System.Windows.Input;
using Xamarin.Forms;
namespace G.ViewModels
{
class ABSViewModel : BaseViewModel{
public ObservableCollection<PlayListItem> Playlist { get; set; }
public ICommand PlayPlaylistCommand { get; set; }
private PlayListItem playlistSelectedItem;
public PlayListItem PlaylistSelectedItem
{
get {
return playlistSelectedItem;
}
set {
playlistSelectedItem = value;
}
}
public ABSViewModel()
{
audio = CrossSimpleAudioPlayer.CreateSimpleAudioPlayer();
PlayButtonText = "PLAY";
PlayButtonImage = "play_icon.png";
PlayButtonFlag = "1";
PlayPlaylistCommand = new Command(PlayPlaylist);
Playlist = new ObservableCollection<PlayListItem>();
}
private async void PlayPlaylist()
{
if (PlayButtonFlag == "1")
{
PlayButtonText = "PAUSE";
PlayButtonImage = "pause_icon.png";
PlayButtonFlag = "0";
int startPosition;
if(PlaylistSelectedItem != null)
{
startPosition = 0;
}
else
{
startPosition = Playlist.IndexOf(playlistSelectedItem);
}
for(int i = 0; i < Playlist.Count; i++)
{
if (audio.IsPlaying) audio.Stop();
PlaylistSelectedItem = Playlist[i];
OnPropertyChanged(nameof(Playlist));
audio.Load(GetStreamFromFile(Playlist[i].Name + ".wav"));
audio.Play();
while (audio.CurrentPosition != audio.Duration) {
Debug.WriteLine("Test");
}
}
}
else
{
PlayButtonText = "PLAY";
PlayButtonImage = "play_icon.png";
PlayButtonFlag = "1";
if (audio.IsPlaying) audio.Pause();
}
NotifyButtonChange();
}
Stream GetStreamFromFile(string filename)
{
var assembly = typeof(App).GetTypeInfo().Assembly;
var stream = assembly.GetManifestResourceStream("G2019." + "RSNZBile.wav");
return stream;
}
private void NotifyButtonChange()
{
OnPropertyChanged(nameof(PlayButtonText));
OnPropertyChanged(nameof(PlayButtonImage));
OnPropertyChanged(nameof(PlayButtonFlag));
}
}
}
I omit the Commands to add elements to the playlist or to stop the playlist from playing because I think they are out of context.
Now, when I press the "Play" button, I see in the debugger that my while cycle did not start at all. So, the player plays all the elements of the playlist in few milliseconds and the only one the human can hear is the last one. That is because my CurrentPosition and Duration properties are both set to 0. Why?
Did anyone try to do something like this?
If anyone is interested, I solved the problem using PlaybackEnded event.
Every piece of code that was inside the for loop was moved inside the PlaybackEnded handler function with no need of a while. The Duration value set to 0 is still a mystery.
private async void PlayPlaylist(string value)
{
if (PlayButtonMode == 1)
{
int startPosition;
//Playing playlist from selected item if user selected one. (first item if he didn't)
if (PlaylistSelectedItem == null) startPosition = 0;
else startPosition = Playlist.IndexOf(playlistSelectedItem);
if (audio.IsPlaying) audio.Stop();
PlaylistSelectedItem = Playlist[startPosition];
audio.Load(GetStreamFromFile(PlaylistSelectedItem.Name + ".wav"));
audio.Play();
audio.PlaybackEnded += Audio_PlaybackEnded;
}
else if (PlayButtonMode == 2)
audio.Play();
else
{
if (audio.IsPlaying) audio.Pause();
}
ChangeButtonMode(PlayButtonMode);
}
private void ChangeButtonMode(int senderMode)
{
if (senderMode == 1 || senderMode == 2)
{
PlayButtonText = "PAUSE";
PlayButtonImage = "pause_icon.png";
PlayButtonMode = 0;
}
else
{
PlayButtonText = "PLAY";
PlayButtonImage = "play_icon.png";
if (senderMode == 3) PlayButtonMode = 1;
else PlayButtonMode = 2;
}
NotifyButtonChange();
}
private void Audio_PlaybackEnded(object sender, EventArgs e)
{
//Check if next playlist element exists
if(Playlist.IndexOf(PlaylistSelectedItem) + 1 < Playlist.Count)
{
PlaylistSelectedItem = Playlist[Playlist.IndexOf(PlaylistSelectedItem) + 1];
if (audio.IsPlaying) audio.Stop();
audio.Load(GetStreamFromFile(PlaylistSelectedItem.Name + ".wav"));
audio.Play();
audio.PlaybackEnded += Audio_PlaybackEnded;
}else if(audio.IsPlaying) audio.Stop();
}
private void StopAudio()
{
if (audio.IsPlaying)
audio.Stop();
ChangeButtonMode(3);
}

How to make a speech-to-text custom model with Watson Unity SDK?

I have made an application, with Watson Assistant, speech-to-text and text to speech in Unity, where the user can say different cities to find available air plane tickets between said cities. The conversation and interactions are working great, but sometimes I have the problem that some cities aren't recognised when the user says them. For example Berlin, sometimes it understands Berlin and another time burning. The same goes for other cities like Paris, London and Jakarta.
So the detection of city names isn't always as accurate as I had hoped. But I saw in some posts that you can make your own custom model to improve the detection of those words. But I have no idea how to set that up, make an own custom model and how to add those cities to the model and train it. Is it possible to do that in Unity C# scripting and how would i start with it? Are there some C# examples that I can look at? Any help would be appreciated.
These are some links and information that I found, but have no idea of how to implement it in C# and for my own purpose in relation of improving the accuracy of city detection.
DwAnswers1 DwAnswers2 StackOverflow IBM clouds docs Medium cURL tutorial
This is the C# script I have for my interaction between the Watson API and Unity. I think i have to add the custom model in here too, but I don't know if i should create the custom model in it too, or if it needs to be in a seperate script.
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using IBM.Watson.DeveloperCloud.Services.TextToSpeech.v1;
using IBM.Watson.DeveloperCloud.Services.Conversation.v1;
using IBM.Watson.DeveloperCloud.Services.ToneAnalyzer.v3;
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1;
using IBM.Watson.DeveloperCloud.Logging;
using IBM.Watson.DeveloperCloud.Utilities;
using IBM.Watson.DeveloperCloud.Connection;
using IBM.Watson.DeveloperCloud.DataTypes;
using MiniJSON;
using UnityEngine.UI;
using FullSerializer;
public class WatsonAgent : MonoBehaviour
{
public string literalEntityCity;
public string destinationCity;
public string departureCity;
public string dateBegin;
public string dateEnd;
public WeatherJSON weather;
public GameObject FlightInfo;
[SerializeField]
private fsSerializer _serializer = new fsSerializer();
[System.Serializable]
public class CredentialInformation
{
public string username, password, url;
}
[System.Serializable]
public class Services
{
public CredentialInformation
textToSpeech,
conversation,
speechToText;
}
[Header("Credentials")]
[Space]
public Services
serviceCredentials;
[Space]
[Header("Agent voice settings")]
[Space]
public AudioSource
voiceSource;
public VoiceType
voiceType;
[Space]
[Header("Conversation settings")]
[Space]
public string
workspaceId;
[Space]
[Header("Feedback fields")]
[Space]
public Text
speechToTextField;
public Text
conversationInputField;
public Text
conversationOutputField;
public string
saying;
// services
SpeechToText
speechToText;
private int
recordingRoutine = 0,
recordingBufferSize = 1,
recordingHZ = 22050;
private string
microphoneID = null;
private AudioClip
recording = null;
TextToSpeech
textToSpeech;
Conversation
conversation;
private Dictionary<string, object>
conversationContext = null;
private void Start()
{
PrepareCredentials();
Initialize();
}
void PrepareCredentials()
{
speechToText = new SpeechToText(GetCredentials(serviceCredentials.speechToText));
textToSpeech = new TextToSpeech(GetCredentials(serviceCredentials.textToSpeech));
conversation = new Conversation(GetCredentials(serviceCredentials.conversation));
}
Credentials GetCredentials(CredentialInformation credentialInformation)
{
return new Credentials(credentialInformation.username, credentialInformation.password, credentialInformation.url);
}
void Initialize()
{
conversation.VersionDate = "2017-05-26";
Active = true;
StartRecording();
}
// speech to text
public bool Active
{
get { return speechToText.IsListening; }
set
{
if (value && !speechToText.IsListening)
{
speechToText.DetectSilence = true;
speechToText.EnableWordConfidence = true;
speechToText.EnableTimestamps = true;
speechToText.SilenceThreshold = 0.01f;
speechToText.MaxAlternatives = 0;
speechToText.EnableInterimResults = true;
speechToText.OnError = OnSpeechError;
speechToText.InactivityTimeout = -1;
speechToText.ProfanityFilter = false;
speechToText.SmartFormatting = true;
speechToText.SpeakerLabels = false;
speechToText.WordAlternativesThreshold = null;
speechToText.StartListening(OnSpeechRecognize);
//speechToText.CustomizationId = "customID"; // I guess i have to add the custom training model here with the customID
//speechToText.CustomizationWeight(0.2); //
}
else if (!value && speechToText.IsListening)
{
speechToText.StopListening();
}
}
}
private void StartRecording()
{
if (recordingRoutine == 0)
{
UnityObjectUtil.StartDestroyQueue();
recordingRoutine = Runnable.Run(RecordingHandler());
}
}
private void StopRecording()
{
if (recordingRoutine != 0)
{
Microphone.End(microphoneID);
Runnable.Stop(recordingRoutine);
recordingRoutine = 0;
}
}
private void OnSpeechError(string error)
{
Active = false;
Log.Debug("ExampleStreaming.OnError()", "Error! {0}", error);
}
private IEnumerator RecordingHandler()
{
recording = Microphone.Start(microphoneID, true, recordingBufferSize, recordingHZ);
yield return null; // let _recordingRoutine get set..
if (recording == null)
{
StopRecording();
yield break;
}
bool bFirstBlock = true;
int midPoint = recording.samples / 2;
float[] samples = null;
while (recordingRoutine != 0 && recording != null)
{
int writePos = Microphone.GetPosition(microphoneID);
if (writePos > recording.samples || !Microphone.IsRecording(microphoneID))
{
Debug.Log("Microphone disconnected.");
StopRecording();
yield break;
}
if ((bFirstBlock && writePos >= midPoint) || (!bFirstBlock && writePos < midPoint))
{
// front block is recorded, make a RecordClip and pass it onto our callback.
samples = new float[midPoint];
recording.GetData(samples, bFirstBlock ? 0 : midPoint);
AudioData record = new AudioData();
record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
record.Clip = AudioClip.Create("Recording", midPoint, recording.channels, recordingHZ, false);
record.Clip.SetData(samples, 0);
speechToText.OnListen(record);
bFirstBlock = !bFirstBlock;
}
else
{
// calculate the number of samples remaining until we ready for a block of audio,
// and wait that amount of time it will take to record.
int remaining = bFirstBlock ? (midPoint - writePos) : (recording.samples - writePos);
float timeRemaining = (float)remaining / (float)recordingHZ;
yield return new WaitForSeconds(timeRemaining);
}
}
yield break;
}
private void OnSpeechRecognize(SpeechRecognitionEvent result, Dictionary<string, object> customData)
{
if (result != null && result.results.Length > 0)
{
foreach (var res in result.results)
{
foreach (var alt in res.alternatives)
{
string text = string.Format("{0} ({1}, {2:0.00})\n", alt.transcript, res.final ? "Final" : "Interim", alt.confidence);
if (speechToTextField != null)
{
speechToTextField.text = text;
}
if (res.final)
{
if (characterState == SocialState.listening)
{
Debug.Log("WATSON | Speech to text recorded: \n" + alt.transcript);
StartCoroutine(Message(alt.transcript));
}
}
else
{
if (characterState == SocialState.idle)
{
characterState = SocialState.listening;
}
}
}
}
}
}
// text to speech
private IEnumerator Synthesize(string text)
{
Debug.Log("WATSON CALL | Synthesize input: \n" + text);
textToSpeech.Voice = voiceType;
bool doSynthesize = textToSpeech.ToSpeech(HandleSynthesizeCallback, OnFail, text, true);
if (doSynthesize)
{
StartCoroutine(Analyze(text));
saying = text;
characterState = SocialState.talking;
}
yield return null;
}
void HandleSynthesizeCallback(AudioClip clip, Dictionary<string, object> customData = null)
{
if (Application.isPlaying && clip != null)
{
voiceSource.clip = clip;
voiceSource.Play();
}
}
// conversation
private IEnumerator Message(string text)
{
Debug.Log("WATSON | Conversation input: \n" + text);
MessageRequest messageRequest = new MessageRequest()
{
input = new Dictionary<string, object>()
{
{ "text", text }
},
context = conversationContext
};
bool doMessage = conversation.Message(HandleMessageCallback, OnFail, workspaceId, messageRequest);
if (doMessage)
{
characterState = SocialState.thinking;
if (conversationInputField != null)
{
conversationInputField.text = text;
}
}
yield return null;
}
void HandleMessageCallback(object resp, Dictionary<string, object> customData)
{
object _tempContext = null;
(resp as Dictionary<string, object>).TryGetValue("context", out _tempContext);
if (_tempContext != null)
conversationContext = _tempContext as Dictionary<string, object>;
string contextList = conversationContext.ToString();
Dictionary<string, object> dict = Json.Deserialize(customData["json"].ToString()) as Dictionary<string, object>;
Dictionary<string, object> output = dict["output"] as Dictionary<string, object>;
Debug.Log("JSON INFO: " + customData["json"].ToString());
// Send new/update context variables to the Watson Conversation Service
if (weather.temperatureCity != null && !conversationContext.ContainsKey("temperature"))
{
string currentTemperature = weather.temperatureNumber.ToString();
conversationContext.Add("temperature", currentTemperature);
}
else if (conversationContext.ContainsKey("temperature"))
{
string currentTemperature = weather.temperatureNumber.ToString();
conversationContext.Remove("temperature");
conversationContext.Add("temperature", currentTemperature);
//Debug.Log("Current Temperature: " + currentTemperature);
}
// $ call context variables
var context = dict["context"] as Dictionary<string, object>;
if (context["destination_city"] != null)
{
destinationCity = context["destination_city"].ToString();
Debug.Log("Destination city: " + destinationCity);
}
if (context["departure_city"] != null)
{
departureCity = context["departure_city"].ToString();
}
List<object> text = output["text"] as List<object>;
string answer = text[0].ToString(); //Geeft alleen de eerste response terug
Debug.Log("WATSON | Conversation output: \n" + answer);
if (conversationOutputField != null)
{
conversationOutputField.text = answer;
}
fsData fsdata = null;
fsResult r = _serializer.TrySerialize(resp.GetType(), resp, out fsdata);
if (!r.Succeeded)
{
throw new WatsonException(r.FormattedMessages);
}
//convert fsdata to MessageResponse
MessageResponse messageResponse = new MessageResponse();
object obj = messageResponse;
r = _serializer.TryDeserialize(fsdata, obj.GetType(), ref obj);
if (!r.Succeeded)
{
throw new WatsonException(r.FormattedMessages);
}
if (resp != null)
{
//Recognize intents & entities
if (messageResponse.intents.Length > 0 && messageResponse.entities.Length > 0)
{
string intent = messageResponse.intents[0].intent;
string entity = messageResponse.entities[0].entity;
string literalEntity = messageResponse.entities[0].value;
if (entity == "city")
{
literalEntityCity = literalEntity;
}
if (intent == "weather" && entity == "city")
{
literalEntityCity = literalEntity;
}
}
if (messageResponse.intents.Length > 0)
{
string intent = messageResponse.intents[0].intent;
//Debug.Log("Intent: " + intent); //intent name
}
if (messageResponse.entities.Length > 0)
{
string entity = messageResponse.entities[0].entity;
//Debug.Log("Entity: " + entity); //entity name
string literalEntity = messageResponse.entities[0].value;
//Debug.Log("Entity Literal: " + literalEntity); //literal spoken entity
if (entity == "city")
{
literalEntityCity = literalEntity;
}
}
}
StartCoroutine(Synthesize(answer));
}
}
The question you are asked is rather complex. I believe if you train a model, it should be using tools from Watson and nothing related with Unity.
But, what you can do in Unity is correcting the return word. That is, if you are expecting to get just names of cities, you can download a list of all the cities, let's say with more than 100.000 inhabitants (you can find this on Internet already), then you check if the returned word is in this list. For example:
http://download.geonames.org/export/dump/
In case it is not, you can consider it was poorly detected by Watson, so you can use something like Levenshtein distance to correct your returned word. Check this
Basically this algorithm tries to find how different two words are. It is possible to use other algorithms to check a given word, which is the most similar to it in a list. You may get some ideas from here or this other one

IBM Watson Speech to Text Service is not giving response in Unity3d

I have an ExampleSstreaming class which actually I got from GitHub of IBM Watson SDK (speech to text service demo). Here it is
public class ExampleStreaming : MonoBehaviour
{
private int m_RecordingRoutine = 0;
private string m_MicrophoneID = null;
private AudioClip m_Recording = null;
private int m_RecordingBufferSize = 5;
private int m_RecordingHZ = 22050;
private SpeechToText m_SpeechToText = new SpeechToText();
void Start()
{
LogSystem.InstallDefaultReactors();
Log.Debug("ExampleStreaming", "Start();");
Active = true;
Debug.Log("start");
StartRecording();
}
public void Update() {
Debug.Log(m_SpeechToText.IsListening);
}
public bool Active
{
get { return m_SpeechToText.IsListening; }
set
{
if (value && !m_SpeechToText.IsListening)
{
m_SpeechToText.DetectSilence = true;
m_SpeechToText.EnableWordConfidence = false;
m_SpeechToText.EnableTimestamps = false;
m_SpeechToText.SilenceThreshold = 0.03f;
m_SpeechToText.MaxAlternatives = 1;
m_SpeechToText.EnableContinousRecognition = true;
m_SpeechToText.EnableInterimResults = true;
m_SpeechToText.OnError = OnError;
m_SpeechToText.StartListening(OnRecognize);
}
else if (!value && m_SpeechToText.IsListening)
{
m_SpeechToText.StopListening();
}
}
}
private void StartRecording()
{
if (m_RecordingRoutine == 0)
{
Debug.Log("m_RecordingRoutine");
UnityObjectUtil.StartDestroyQueue();
m_RecordingRoutine = Runnable.Run(RecordingHandler());
}
}
private void StopRecording()
{
if (m_RecordingRoutine != 0)
{
Microphone.End(m_MicrophoneID);
Runnable.Stop(m_RecordingRoutine);
m_RecordingRoutine = 0;
}
}
private void OnError(string error)
{
Active = false;
Log.Debug("ExampleStreaming", "Error! {0}", error);
}
private IEnumerator RecordingHandler()
{
Log.Debug("ExampleStreaming", "devices: {0}", Microphone.devices);
m_MicrophoneID = Microphone.devices[0];
Debug.Log("m_MicrophoneID : " + m_MicrophoneID);
m_Recording = Microphone.Start(m_MicrophoneID, true, m_RecordingBufferSize, m_RecordingHZ);
yield return null; // let m_RecordingRoutine get set..
Debug.Log("m_Recording : " + m_Recording.length);
if (m_Recording == null)
{
Debug.Log("m_Recording is null");
StopRecording();
yield break;
}
bool bFirstBlock = true;
int midPoint = m_Recording.samples / 2;
float[] samples = null;
while (m_RecordingRoutine != 0 && m_Recording != null)
{
int writePos = Microphone.GetPosition(m_MicrophoneID);
if (writePos > m_Recording.samples || !Microphone.IsRecording(m_MicrophoneID))
{
Log.Error("MicrophoneWidget", "Microphone disconnected.");
StopRecording();
yield break;
}
if ((bFirstBlock && writePos >= midPoint)
|| (!bFirstBlock && writePos < midPoint))
{
// front block is recorded, make a RecordClip and pass it onto our callback.
samples = new float[midPoint];
m_Recording.GetData(samples, bFirstBlock ? 0 : midPoint);
AudioData record = new AudioData();
record.MaxLevel = Mathf.Max(samples);
record.Clip = AudioClip.Create("Recording", midPoint, m_Recording.channels, m_RecordingHZ, false);
record.Clip.SetData(samples, 0);
m_SpeechToText.OnListen(record);
bFirstBlock = !bFirstBlock;
}
else
{
// calculate the number of samples remaining until we ready for a block of audio,
// and wait that amount of time it will take to record.
int remaining = bFirstBlock ? (midPoint - writePos) : (m_Recording.samples - writePos);
float timeRemaining = (float)remaining / (float)m_RecordingHZ;
yield return new WaitForSeconds(timeRemaining);
}
}
yield break;
}
private void OnRecognize(SpeechRecognitionEvent result)
{
Debug.Log("OnRecognize");
if (result != null && result.results.Length > 0)
{
foreach (var res in result.results)
{
foreach (var alt in res.alternatives)
{
string text = alt.transcript;
Debug.Log(text);
Log.Debug("ExampleStreaming", string.Format("{0} ({1}, {2:0.00})\n", text, res.final ? "Final" : "Interim", alt.confidence));
}
}
}
}
}
and this is the line i add to get microphone. I just edit it to provide Microphone Device at zero index which was actually null (I don't know why, is this intentionally left or an error), in the function RecordingHandler .
m_MicrophoneID = Microphone.devices[0];
but unfortunately it is not showing any output log in EventOnRecognize which i think that it should execute.
Wile it displaying these logs, after some seconds (as i given length 5 of the audio). What i am doing wrong, i am unable to understand that how speech to text.
[DEBUG] OnListenClosed(), State = DISCONNECTED
[DEBUG] KeepAlive exited.
I have also tried IBM Watson Speech To text Scene it is also not showing anything.
I am not able to stream real-time output yet but become able to convert audio clip into text through watson service and here is the simple code (which took three days).
using UnityEngine;
using System.Collections;
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1;
public class AudioClipToTextWatson : MonoBehaviour {
// Non-streaming
SpeechToText m_SpeechToText = new SpeechToText();
public AudioClip m_AudioClip = new AudioClip();
public bool on = false;
void Start () {
m_AudioClip = Microphone.Start(Microphone.devices[0], false, 4, 44100);
m_SpeechToText.Recognize(m_AudioClip, OnRecognize);
// Streaming
m_SpeechToText.StartListening(OnRecognize);
// Stop listening
m_SpeechToText.StopListening();
}
private void OnRecognize(SpeechRecognitionEvent result)
{
Debug.Log("result : " + result);
if (result != null && result.results.Length > 0)
{
foreach (var res in result.results)
{
foreach (var alt in res.alternatives)
{
string text = alt.transcript;
Debug.Log(text);
Debug.Log(res.final);
}
}
}
}
}
Note :You can record and an audio clip using your microphone and convert it to text. If you already have an audio then drop it to inspector and comment out the first line in Start Event.
I solved Error
I encounter the same issue with Unity 2018.3.14f1.
I just change player settings and then works fine
file -> build settings - > player settings -> Other Settings
Configuration
Scripting runtime version : .Net 4x equivalent
API Compatibility level: .Net 4x

How to make RFID reader seek for card continously

For a while now, i have been trying to make my RFID reader to automatically read (scan) a card. Although the former code i had, makes the reader scan the card when a button is pressed. But i want to be able to scan cards automatically and continuously, once any card is placed in the field. Here is what i tried. but i was not able to display the scanned UID in a textbox. Please, your help will be appreciated.
class CardReader : IDisposable
{
IntPtr _pSnr = Marshal.AllocHGlobal(1024);
private Thread _t;
private Action<string> _callback;
private volatile bool _stop;
public void ReadCard()
{
short icdev = 0x0000;
int status;
byte type = (byte)'A';//mifare one type is A
byte mode = 0x26; // Request the card which is not halted.
ushort TagType = 0;
byte bcnt = 0x04;//mifare hold on 4
IntPtr pSnr;
byte len = 255;
sbyte size = 0;
pSnr = Marshal.AllocHGlobal(1024);
for (int i = 0; i < 2; i++)
{
status = rf_request(icdev, mode, ref TagType);//request card
if (status != 0)
continue;
status = rf_anticoll(icdev, bcnt, pSnr, ref len);//i did anticol--get the card sn
if (status != 0)
continue;
status = rf_select(icdev, pSnr, len, ref size);//select one card
if (status != 0)
continue;
byte[] szBytes = new byte[len];
for (int j = 0; j < len; j++)
{
szBytes[j] = Marshal.ReadByte(pSnr, j);
}
String m_cardNo = String.Empty;
for (int q = 0; q < len; q++)
{
m_cardNo += byteHEX(szBytes[q]);
}
_callback(m_cardNo);
// txtSearchPurse.Text = m_cardNo;
break;
}
}
public void Work()
{
while (!_stop)
{
ReadCard();
Thread.Sleep(1000);
}
}
public void Start(Action<string> cardRead)
{
if (_t != null)
return;
_stop = false;
_callback = cardRead;
_t = new Thread(Work);
_t.Start();
}
public void Stop()
{
if (_t != null)
{
_stop = true;
_t.Join();
_t = null;
}
}
public void Dispose()
{
Marshal.FreeHGlobal(_pSnr);
}
}
And i tried to use it like this:
private void btnRequest_Click(object sender, EventArgs e)
{
var reader = new CardReader();
reader.Start(CardReaded);
}
private void CardReaded(string cardnr){
//txtSearchPurse.Text = _callback;
}
Here is what i tried. but i was not able to display the scanned UID in a textbox.
So your callback successfully fired? What error, if any, did you get? I'm guessing you got a "cross thread exception". If so, fix it like this:
private void CardReaded(string cardnr)
{
this.Invoke((MethodInvoker)delegate
{
txtSearchPurse.Text = cardnr;
});
}

Categories

Resources