Im trying to get value from the struct and use the value in OverSized to use it in the if statement. but the program doesnt seem like its getting any information from the oversized. by writing on console I can see the value of overSized is true(for example) but that heavy in the third code section doesnt really have any value. Am I calling overSized in a wrong way?
This is my struct for shipping box:
public struct ShippingBox
{
public int Length;
public int Width;
public int Height;
public bool OverSized;
public ShippingBox(
int length,
int width,
int height,
bool overSized)
{
this.Length = length;
this.Width = width;
this.Height = height;
this.OverSized = overSized;
}
}
and this is how I give value to the struct:(typewiseboxes is already defined)
bool overSized = false;
if (some condition)
{
overSized = true;
}
int l = 0;
int w = 0;
int h = 0;
if (overSized)
{
l = 100;
w = 100;
h = 100;
}
if (boxTypeWiseNumOfBoxes > 0)
{
typeWiseBoxes.Add(
new ShippingBox(
l,
w,
h,
overSized));
}
now trying to get the value of oversized with this:
ShippingBox specialBox = new ShippingBox();
var heavy = specialBox.OverSized; //also tried bool heavy
int tempLength = 0;
int tempWidth = 0;
int tempHeight = 0;
if (heavy)
{
tempLength = 101;
tempWidth = 8;
tempHeight = 12;
}
else if (!heavy)
{
tempLength = 77;
tempWidth = 8;
tempHeight = 12;
}
I guess you have to change:
ShippingBox specialBox = new ShippingBox(); // this assigns specialBox an instance of a brand new ShippingBox object
var heavy = specialBox.OverSized; //heavy will be undefined
...
to:
ShippingBox specialBox = typeWiseBoxes.Last(); // this assigns specialBox the instance of the last ShippingBox object added to typeWiseBoxes List
var heavy = specialBox.OverSized; // heavy is set to the value of OverSized property of last typeWiseBoxes List element
...
and thus assigning specialBox the last element added to the list instead of a brand new one
Related
I'm want to implement the k-means clustering algorithm to clusterize a list of text files loaded from disk, and also using the silhouette method to determine the number of clusters.
The error I'm getting is System.ArgumentOutOfRangeException: 'Schema mismatch for input column 'Features': expected scalar or vector of String, got VarVector<Single> (Parameter 'inputSchema')'
How can I solve the problem?
Code
static void Main(string[] args)
{
// Load text files from disk
string[] filePaths = Directory.GetFiles("C:\\ExportedEmails\\", "*.txt");
string[] textFiles = filePaths.Select(File.ReadAllText).ToArray();
// Extract features from text files
var textData = textFiles.Select((text, index) =>
{
int startIndex = text.IndexOf("Description: ") + "Description: ".Length;
string description = text.Substring(startIndex);
return new TextData { Text = description, Index = index };
}).ToArray();
// Initialize the MLContext
var context = new MLContext();
// Convert data to IDataView
var data = context.Data.LoadFromEnumerable(textData);
// Initialize the variables to store the silhouette scores
var silhouetteScores = new double[10];
// Iter
for (int k = 2; k <= 10; k++)
{
// Create a new KMeansTrainer
var pipeline = context.Transforms.Text.FeaturizeText("Text", "Features")
.Append(context.Clustering.Trainers.KMeans(featureColumnName: "Features", numberOfClusters: k));
var model = pipeline.Fit(data);
var transformedData = model.Transform(data);
// Compute the silhouette score
var clusterAssignments = transformedData.GetColumn<uint>("PredictedLabel").ToArray();
var clusterCenters = transformedData.GetColumn<float[]>("Centroid").ToArray();
var features = transformedData.GetColumn<float[]>("Features").ToArray();
var silhouetteScore = ComputeSilhouetteScore(features, clusterAssignments, clusterCenters);
silhouetteScores[k - 2] = silhouetteScore;
}
// Find the number of clusters that maximizes the average silhouette score
int optimalClusters = 0;
double maxScore = double.MinValue;
for (int i = 0; i < silhouetteScores.Length; i++)
{
if (silhouetteScores[i] > maxScore)
{
maxScore = silhouetteScores[i];
optimalClusters = i + 2;
}
}
Console.WriteLine($"The optimal number of clusters is {optimalClusters}.");
}
private static double ComputeSilhouetteScore(float[][] features, uint[] clusterAssignments, float[][] clusterCenters)
{
double silhouetteScore = 0;
for (int i = 0; i < features.Length; i++)
{
var a = ComputeAverageDistance(features[i], clusterAssignments[i], features, clusterAssignments);
var b = ComputeMinimumDistance(features[i], clusterAssignments[i], clusterCenters);
silhouetteScore += (b - a) / Math.Max(a, b);
}
return silhouetteScore / features.Length;
}
private static double ComputeAverageDistance(float[] feature, uint clusterAssignment, float[][] features, uint[] clusterAssignments)
{
double distance = 0;
int count = 0;
for (int i = 0; i < features.Length; i++)
{
if (clusterAssignments[i] == clusterAssignment)
{
distance += Distance(feature, features[i]);
count++;
}
}
return distance / count;
}
private static double ComputeMinimumDistance(float[] feature, uint clusterAssignment, float[][] clusterCenters)
{
double minDistance = double.MaxValue;
for (int i = 0; i < clusterCenters.Length; i++)
{
if (i != clusterAssignment)
{
minDistance = Math.Min(minDistance, Distance(feature, clusterCenters[i]));
}
}
return minDistance;
}
private static double Distance(float[] feature1, float[] feature2)
{
double distance = 0;
for (int i = 0; i < feature1.Length; i++)
{
distance += Math.Pow(feature1[i] - feature2[i], 2);
}
return Math.Sqrt(distance);
}
class TextData
{
public string? Text { get; set; }
public float[] Features { get; set; }
public int Index { get; set; }
}
I believe the reason you're getting this error is you set the output column name to Text and input column name as Features in the FeaturizeText transform. I suspect you want the opposite. Take the Text column as input and generate a feature vector called Features which you can do by swapping the values in FeaturizeText or you can explicitly add parameter names (FeaturizeText(outputColumnName: "Features", inputColumnName:"Text")).
Check out the FeaturizeText documentation for more details.
I use this method and need return weights8spr, but I don't know how.
Can anybody help me? simplest return(weights8spr) don't work, because in input Im not give the double array.
public class Run {
public Run(List<dataVM2> TrainDataForStations)
{
double[] resultMAX1 = new double[] { 40.0, 1200.0, 100.0, 100.0, 10000.0 };
double[] resultMIN1 = new double[] { -50.0, 0.0, 0.0, 0.0, 0.0 };
double d1 = 0.0;
double d2 = 1.0;
int numItemsspr = TrainDataForStations.Count;
double[][] trainData = new double[numItemsspr][];
Random rnd = new Random(1);
double[][] MassiveDataspr8 = new double[numItemsspr][];
for (var i = 0; i < numItemsspr; ++i)
{
trainData[i] = new double[] { TrainDataForStations[i].TemperatureC1, TrainDataForStations[i].SolarRadiation1, TrainDataForStations[i].Wetness1, TrainDataForStations[i].WindSpeed1, TrainDataForStations[i].gen1 };
}
int maxcol = 0;
for (int i = 0; i < trainData.Length; i++)
{
if (trainData[i].Length > maxcol)
maxcol = trainData[i].Length;
}
//data normalization
for (int j = 0; j < MassiveDataspr8.Length; j++)
{
MassiveDataspr8[j] = new double[maxcol];
for (int i = 0; i < maxcol; i++)
{
MassiveDataspr8[j][i] = (((trainData[j][i] - resultMIN1[i]) * (d2 - d1)) / (resultMAX1[i] - resultMIN1[i])) + d1;
}
}
int NumInput = 4;
int NumHidden = 25;
int NumOutput = 1;
int rndSeed = 0;
NeuralNetworkData neuralform = new NeuralNetworkData(NumInput, NumHidden, NumOutput, rnd);
int maxEpochs = 1000;
double learnRate = 0.005;
double momentum = 0.001;
double[] weights8spr = new NeuralNetworkTrainer(neuralform, rnd).Train(MassiveDataspr8, maxEpochs, learnRate, momentum);
}
}
You are executing that code in the constructor of a class and you can't change the return type of a constructor.
You should create a method in the class and refactor the code a little.
public class Run
{
// default constructor not needed
public Run()
{
}
public double[] RunMethod(List<dataVM2> TrainDataForStations)
{
// put your code here
// ...
// return the double[]
return weights8spr;
}
}
And execute like:
var run = new Run();
var weights = run.RunMethod(listOfTrainDataForStations);
A method must have a return type. If it doesn't return a value, then this would be the pseudo type void, otherwise a real one.
public class StationsTrainer {
public double[] Run(List<dataVM2> TrainDataForStations)
{
...
double[] weights8spr = new NeuralNetworkTrainer(neuralform, rnd)
.Train(MassiveDataspr8, maxEpochs, learnRate, momentum);
return weights8spr;
}
}
And choose a better name for the class. E.g. StationsTrainer.
So, I have a piece of code that reads out an ISampleSource in to a float[][], the first array layer being for the number of channels and the second being for the sample data within the channel. I am going to take this data and attempt to apply signal processing to it, however for debugging purposes I might want to manipulate the sample array and then play it back so that I can "hear" what the code is doing. is there an easy way to take the data returned by ISampleSource.Read and stick it back in to a new ISampleSource so it can then be converted to an IWaveSource and played using WasapiOut?
Here is the class I tried to make so far, you pass it the float[][] and basically all the data in a WaveFormat for it to make one from.. but it doesn't actually do anything. doesn't error, doesn't play.. just does nothing. What am I doing wrong?
private class SampleSource : ISampleSource
{
public long Position { get; set; }
public WaveFormat WaveFormat { get; private set; }
public bool CanSeek => true;
public long Length => _data.Length;
private float[] _data;
private long readPoint = 0;
public SampleSource(float[][] samples, int sampleRate, int bits, int channels)
{
WaveFormat = new WaveFormat(sampleRate, bits, channels);
if (samples.Length <= 0) return;
_data = new float[samples[0].Length * samples.Length];
int cchannels = samples.Length;
int sampleLength = samples[0].Length;
for (var i = 0; i < sampleLength; i += cchannels)
for (var n = 0; n < cchannels; n++)
_data[i + n] = samples[n][i / cchannels];
}
public int Read(float[] buffer, int offset, int count)
{
if (_data.Length < Position + count)
count = (int) (_data.Length - Position);
float[] outFloats = new float[count];
for (var i = 0; i < count; i++)
outFloats[i] = _data[i + Position + offset];
buffer = outFloats;
Position += count;
return count;
}
public void Dispose() =>_data = null;
}
Rather than trying to set buffer to a new array (which makes no sense) I needed to directly write to the buffer array elements, so that they can be used outside of the function call. I don't really like doing it this way, maybe it's to fix an issue I don't see, but clearly that's how the library I'm using does it.
private class SampleSource : ISampleSource
{
public long Position { get; set; }
public WaveFormat WaveFormat { get; private set; }
public bool CanSeek => true;
public long Length => _data.Length;
private float[] _data;
private long readPoint = 0;
public SampleSource(float[][] samples, int sampleRate, int bits, int channels)
{
WaveFormat = new WaveFormat(sampleRate, bits, channels);
if (samples.Length <= 0) return;
_data = new float[samples[0].Length * samples.Length];
int cchannels = samples.Length;
int sampleLength = samples[0].Length;
for (var i = 0; i < sampleLength; i += cchannels)
for (var n = 0; n < cchannels; n++)
_data[i + n] = samples[n][i / cchannels];
}
public int Read(float[] buffer, int offset, int count)
{
/*THIS IS THE CHANGED FUNCTION*/
if (_data.Length < Position + count)
count = (int) (_data.Length - Position);
for (var i = 0; i < count; i++)
buffer[i] = _data[i + Position + offset];
Position += count;
return count;
}
public void Dispose() =>_data = null;
}
I am currently exploring neural networks and machine learning and I implemented a basic neural network in c#. Now I wanted to test my back propagation training algorithm with the MNIST database. Although I am having serious trouble reading the files correctly.
Spoiler the code is currently very badly optimised for performance. My aim currently is to grasp the subject and get a structured view how things work before I start throwing out my data structures for faster ones.
To train the network I want to feed it a custom TrainingSet data structure:
[Serializable]
public class TrainingSet
{
public Dictionary<List<double>, List<double>> data = new Dictionary<List<double>, List<double>>();
}
Keys will be my input data (784 pixels per entry(image) which will represent the greyscale values in range from 0 to 1). Values will be my output data (10 entries representing the digits from 0-9 with all entries on 0 except the exspected one at 1)
Now I want to read the MNIST database according to this contract. I am currentl on my 2nd try which is inspired by this blogpost: https://jamesmccaffrey.wordpress.com/2013/11/23/reading-the-mnist-data-set-with-c/ . Sadly it is still producing the same nonsense as my first try scattering the pixels in a strange pattern:
My current reading algorithm:
public static TrainingSet GenerateTrainingSet(FileInfo imagesFile, FileInfo labelsFile)
{
MnistImageView imageView = new MnistImageView();
imageView.Show();
TrainingSet trainingSet = new TrainingSet();
List<List<double>> labels = new List<List<double>>();
List<List<double>> images = new List<List<double>>();
using (BinaryReader brLabels = new BinaryReader(new FileStream(labelsFile.FullName, FileMode.Open)))
{
using (BinaryReader brImages = new BinaryReader(new FileStream(imagesFile.FullName, FileMode.Open)))
{
int magic1 = brImages.ReadBigInt32(); //Reading as BigEndian
int numImages = brImages.ReadBigInt32();
int numRows = brImages.ReadBigInt32();
int numCols = brImages.ReadBigInt32();
int magic2 = brLabels.ReadBigInt32();
int numLabels = brLabels.ReadBigInt32();
byte[] pixels = new byte[numRows * numCols];
// each image
for (int imageCounter = 0; imageCounter < numImages; imageCounter++)
{
List<double> imageInput = new List<double>();
List<double> exspectedOutput = new List<double>();
for (int i = 0; i < 10; i++) //generate empty exspected output
exspectedOutput.Add(0);
//read image
for (int p = 0; p < pixels.Length; p++)
{
byte b = brImages.ReadByte();
pixels[p] = b;
imageInput.Add(b / 255.0f); //scale in 0 to 1 range
}
//read label
byte lbl = brLabels.ReadByte();
exspectedOutput[lbl] = 1; //modify exspected output
labels.Add(exspectedOutput);
images.Add(imageInput);
//Debug view showing parsed image.......................
Bitmap image = new Bitmap(numCols, numRows);
for (int y = 0; y < numRows; y++)
{
for (int x = 0; x < numCols; x++)
{
image.SetPixel(x, y, Color.FromArgb(255 - pixels[x * y], 255 - pixels[x * y], 255 - pixels[x * y])); //invert colors to have 0,0,0 be white as specified by mnist
}
}
imageView.SetImage(image);
imageView.Refresh();
//.......................................................
}
brImages.Close();
brLabels.Close();
}
}
for (int i = 0; i < images.Count; i++)
{
trainingSet.data.Add(images[i], labels[i]);
}
return trainingSet;
}
All images produce a pattern as shown above. It's never the exact same pattern but always seems to have the pixels "pulled" down to the right corner.
That is how I did it:
public static class MnistReader
{
private const string TrainImages = "mnist/train-images.idx3-ubyte";
private const string TrainLabels = "mnist/train-labels.idx1-ubyte";
private const string TestImages = "mnist/t10k-images.idx3-ubyte";
private const string TestLabels = "mnist/t10k-labels.idx1-ubyte";
public static IEnumerable<Image> ReadTrainingData()
{
foreach (var item in Read(TrainImages, TrainLabels))
{
yield return item;
}
}
public static IEnumerable<Image> ReadTestData()
{
foreach (var item in Read(TestImages, TestLabels))
{
yield return item;
}
}
private static IEnumerable<Image> Read(string imagesPath, string labelsPath)
{
BinaryReader labels = new BinaryReader(new FileStream(labelsPath, FileMode.Open));
BinaryReader images = new BinaryReader(new FileStream(imagesPath, FileMode.Open));
int magicNumber = images.ReadBigInt32();
int numberOfImages = images.ReadBigInt32();
int width = images.ReadBigInt32();
int height = images.ReadBigInt32();
int magicLabel = labels.ReadBigInt32();
int numberOfLabels = labels.ReadBigInt32();
for (int i = 0; i < numberOfImages; i++)
{
var bytes = images.ReadBytes(width * height);
var arr = new byte[height, width];
arr.ForEach((j,k) => arr[j, k] = bytes[j * height + k]);
yield return new Image()
{
Data = arr,
Label = labels.ReadByte()
};
}
}
}
Image class:
public class Image
{
public byte Label { get; set; }
public byte[,] Data { get; set; }
}
Some extension methods:
public static class Extensions
{
public static int ReadBigInt32(this BinaryReader br)
{
var bytes = br.ReadBytes(sizeof(Int32));
if (BitConverter.IsLittleEndian) Array.Reverse(bytes);
return BitConverter.ToInt32(bytes, 0);
}
public static void ForEach<T>(this T[,] source, Action<int, int> action)
{
for (int w = 0; w < source.GetLength(0); w++)
{
for (int h = 0; h < source.GetLength(1); h++)
{
action(w, h);
}
}
}
}
Usage:
foreach (var image in MnistReader.ReadTrainingData())
{
//use image here
}
or
foreach (var image in MnistReader.ReadTestData())
{
//use image here
}
Why not use a nuget package:
MNIST.IO Just a datareader (disclaimer: my package)
Accord.DataSets Contains classes to download and parse machine learning datasets such as MNIST, News20, Iris. This package is part of the Accord.NET Framework.
I have a function which is applied on each element of a 2D array (double[,]), but only along a given dimension.
I had to create two functions because I don't know how to pass the desired dimension to the method as a parameter. I ended up with a "vertical_foo" and a "horizontal_foo" functions, which are almost identical to each other:
private double[,] vertical_foo (double[,] a) {
int height = a.GetLength(0);
int width = a.GetLength(1);
var result = new double[height, weight];
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
// Here I use first ("i") dimension
int before = Math.Max(i-1, 0);
int after = Math.Min(i+1, height-1);
result[i,j] = (a[after, j] - a[before, j]) * 0.5;
}
}
return result;
}
private double[,] horizontal_foo (double[,] a) {
int height = a.GetLength(0);
int width = a.GetLength(1);
var result = new double[height, weight];
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
// Here I use second ("j") dimension
int before = Math.Max(j-1, 0);
int after = Math.Min(j+1, height-1);
result[i,j] = (a[i, after] - a[i, before]) * 0.5;
}
}
return result;
}
I would like to have a signature like this, where the second parameter is the dimension on which I want to apply the indexing:
private double[,] general_foo (double[,] a, int dimension) {}
Any suggestion is much welcome!
I'll take a stab at this:
private double[,] general_foo(double[,] a, int dimension)
{
var w = a.GetLength(0);
var h = a.GetLength(1);
var result = new double[w, h];
var otherDimension = 1 - dimension; // NOTE only works for 2D arrays
var otherDimensionLength = a.GetLength(otherDimension);
var dimensionLength = a.GetLength(dimension);
for (int i = 0; i < dimensionLength; i++)
{
for (int j = 0; j < otherDimensionLength; j++)
{
var setIndexes = new int[2] { j, j };
setIndexes[dimension] = i;
var beforeIndexes = new int[2] { j, j };
beforeIndexes[dimension] = Math.Max(i - 1, 0);
var afterIndexes = new int[2] { j, j };
afterIndexes[dimension] = Math.Min(i + 1, dimensionLength - 1);
var beforeValue = (double)a.GetValue(beforeIndexes);
var afterValue = (double)a.GetValue(afterIndexes);
result.SetValue((afterValue - beforeValue) * 0.5, setIndexes);
}
}
return result;
}
Here's a more generic method. It uses a few lambdas, so the it might also help you understand the use of lambdas a bit also.
// Iterates through every item in a multidementional array array
private Array MutateArray<T>(Array a, Func<T, int[], T> selector)
{
var rank = a.Rank;
var lengths = Enumerable.Range(0, a.Rank)
.Select(r => a.GetLength(r))
.ToArray(); // Get length of a in each dimension
var result = Array.CreateInstance(typeof(T), lengths);
var index = new int[a.Rank];
foreach (T item in a) // flattens array
{
result.SetValue(selector(item, index), index);
// Get next index value (I'm sure this could be improved)
for (var d = 0; d < rank; d++)
{
if (index[d] == lengths[d] - 1)
{
index[d] = 0;
}
else
{
index[d]++;
break;
}
}
}
return result;
}
// Your "foo" method
private double[,] generic_foo(double[,] a, int d)
{
var upperD = a.GetUpperBound(d);
return (double[,])MutateArray<double>(a, (x, i) =>
{
var prev = i.ToArray(); // clone
prev[d] = Math.Max(prev[d] - 1, 0);
var next = i.ToArray(); // clone
next[d] = Math.Min(next[d] + 1, upperD);
var prevVal = (double)a.GetValue(prev);
var nextVal = (double)a.GetValue(next);
return (nextVal - prevVal) * 0.5;
});
}
Would it be acceptable to do something along these lines?
int before_i = i, after_i = i;
int before_j = j, after_j = j;
switch( dimension ) {
case 0:
before_i = Math.max(i-1,0);
after_i = Math.min(i+1, width-1);
break;
case 1:
before_j = Math.max(j-1,0);
after_j = Math.min(j+1, height-1);
break;
}
result[ i, j ] = (a[after_i, after_j] - a[before_i,before_j]) * 0.5
It's not terribly pretty, but at least this way you don't need two functions.
You could pass in a delegate to extract the dimension you're interested in? (or a lambda)
Func<int[,],int,int[]> accessor here indicates the signature of a function (where the last template parameter is the return type)
private void Working()
{
DoSomething(GetRow,1);
}
So, in this example, you want the "DoSomething" worker to work on a row.
private void DoSomething(Func<int[,],int,int[]> accessor, int Idx)
{
int[,] theData = {{1,1,1,1,1},{2,2,2,2,2}};
int[] someData = accessor(theData,Idx);
}
public int[] GetRow(int[,] data,int index)
{
List<int> numbers = new List<int>();
for (int i = 0; i < data.GetLength(1); i++)
{
numbers.Add(data[index, i]);
}
return numbers.ToArray();
}
In the above example, you get a one dimensional array of 2,2,2,2,2
I'm addressing the general case of extracting a particular part of a multidimensional array here... The method/ lambda you pass in extracts the meaningful part of data...