Proper way to measure memory allocation with Tasks - c#

I need to reduce number of tasks created/executed in small piece of code because there is potentially big memory allocation.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace TaskMemoryUsage
{
class Program
{
static void Main(string[] args)
{
int availableTaskCount = Environment.ProcessorCount;
OriginalCalculationMethod();
FirstCalculationMethod(availableTaskCount);
SecondCalculationMethod(availableTaskCount);
Console.ReadKey();
}
static TaskResult Calculate(int i)
{
//a lot of complicated calculations are made here
return new TaskResult();
}
static List<TaskResult> OriginalCalculationMethod()
{
List<Task<TaskResult>> workersArray = new List<Task<TaskResult>>();
long memStart = GC.GetTotalMemory(true);
for (int i = 0; i < 10000; ++i)
{
workersArray.Add(
Task.Factory.StartNew(
new Func<TaskResult>(() => Calculate(i))
));
}
Task.WaitAll(workersArray.ToArray());
long memEnd = GC.GetTotalMemory(true);
Console.WriteLine($"DIFFERENCE = {memEnd - memStart}");
return workersArray.Select(s => s.Result).ToList();
}
static List<TaskResult> FirstCalculationMethod(int availableTaskCount)
{
long memStart = GC.GetTotalMemory(true);
using (SemaphoreSlim semaphore = new SemaphoreSlim(availableTaskCount))
{
List<Task<TaskResult>> tasks = new List<Task<TaskResult>>();
for (int i = 0; i < 10000; ++i)
{
Task<TaskResult> task = Task.Factory.StartNew(() =>
{
try
{
semaphore.Wait();
return Calculate(i);
}
finally
{
semaphore.Release();
}
});
tasks.Add(task);
}
Task.WaitAll(tasks.ToArray());
long memEnd = GC.GetTotalMemory(true);
Console.WriteLine($"DIFFERENCE = {memEnd - memStart}");
return tasks.Select(s => s.Result).ToList();
}
}
static List<TaskResult> SecondCalculationMethod(int availableTaskCount)
{
List<Task<TaskResult>> workersArray = new List<Task<TaskResult>>();
List<TaskResult> tasksResults = new List<TaskResult>();
long memStart = GC.GetTotalMemory(true);
for (int i = 0; i < 10000; ++i)
{
workersArray.Add(Task.Factory.StartNew(new Func<TaskResult>(() => Calculate(i))
));
if (workersArray.Count >= availableTaskCount)
{
Task.WaitAll(workersArray.ToArray());
tasksResults.AddRange(workersArray.Select(t => t.Result));
workersArray.Clear();
}
}
Task.WaitAll(workersArray.ToArray());
long memEnd = GC.GetTotalMemory(true);
Console.WriteLine($"DIFFERENCE = {memEnd - memStart}");
return tasksResults;
}
}
class TaskResult
{
public double[] calculationResult;
}
}
I gained best results for last solution, but I am not sure if i meassure it properly, so I want to ask if I properly measure memory allocation for this issue and if my solutions are correct or if there are better solutions for reduce memory allocation that is consumed at once.

Related

Task process ending before finishing all the work

I've been having trouble running multiple tasks with heavy operations.
It seems as if the task processes is killed before all the operations are complete.
The code here is an example code I used to replicate the issue. If I add something like Debug.Write(), the added wait for writing fixes the issue. The issue is gone if I test on a smaller sample size too. The reason there is a class in the example below is to create complexity for the test.
The real case where I encountered the issue first is too complicated to explain for a post here.
public static class StaticRandom
{
static int seed = Environment.TickCount;
static readonly ThreadLocal<Random> random =
new ThreadLocal<Random>(() => new Random(Interlocked.Increment(ref seed)));
public static int Next()
{
return random.Value.Next();
}
public static int Next(int maxValue)
{
return random.Value.Next(maxValue);
}
public static double NextDouble()
{
return random.Value.NextDouble();
}
}
// this is the test function I run to recreate the problem:
static void tasktest()
{
var testlist = new List<ExampleClass>();
for (var index = 0; index < 10000; ++index)
{
var newClass = new ExampleClass();
newClass.Populate(Enumerable.Range(0, 1000).ToList());
testlist.Add(newClass);
}
var anotherClassList = new List<ExampleClass>();
var threadNumber = 5;
if (threadNumber > testlist.Count)
{
threadNumber = testlist.Count;
}
var taskList = new List<Task>();
var tokenSource = new CancellationTokenSource();
CancellationToken cancellationToken = tokenSource.Token;
int stuffPerThread = testlist.Count / threadNumber;
var stuffCounter = 0;
for (var count = 1; count <= threadNumber; ++count)
{
var toSkip = stuffCounter;
var threadWorkLoad = stuffPerThread;
var currentIndex = count;
// these ifs make sure all the indexes are covered
if (stuffCounter + threadWorkLoad > testlist.Count)
{
threadWorkLoad = testlist.Count - stuffCounter;
}
else if (count == threadNumber && stuffCounter + threadWorkLoad < testlist.Count)
{
threadWorkLoad = testlist.Count - stuffCounter;
}
taskList.Add(Task.Factory.StartNew(() => taskfunc(testlist, anotherClassList, toSkip, threadWorkLoad),
cancellationToken, TaskCreationOptions.None, TaskScheduler.Default));
stuffCounter += stuffPerThread;
}
Task.WaitAll(taskList.ToArray());
}
public class ExampleClass
{
public ExampleClassInner[] Inners { get; set; }
public ExampleClass()
{
Inners = new ExampleClassInner[5];
for (var index = 0; index < Inners.Length; ++index)
{
Inners[index] = new ExampleClassInner();
}
}
public void Populate(List<int> intlist) {/*adds random ints to the inner class*/}
public ExampleClass(ExampleClass copyFrom)
{
Inners = new ExampleClassInner[5];
for (var index = 0; index < Inners.Length; ++index)
{
Inners[index] = new ExampleClassInner(copyFrom.Inners[index]);
}
}
public class ExampleClassInner
{
public bool SomeBool { get; set; } = false;
public int SomeInt { get; set; } = -1;
public ExampleClassInner()
{
}
public ExampleClassInner(ExampleClassInner copyFrom)
{
SomeBool = copyFrom.SomeBool;
SomeInt = copyFrom.SomeInt;
}
}
}
static int expensivefunc(int theint)
{
/*a lot of pointless arithmetic and loops done only on primitives and with primitives,
just to increase the complexity*/
theint *= theint + 1;
var anotherlist = Enumerable.Range(0, 10000).ToList();
for (var index = 0; index < anotherlist.Count; ++index)
{
theint += index;
if (theint % 5 == 0)
{
theint *= index / 2;
}
}
var yetanotherlist = Enumerable.Range(0, 50000).ToList();
for (var index = 0; index < yetanotherlist.Count; ++index)
{
theint += index;
if (theint % 7 == 0)
{
theint -= index / 3;
}
}
while (theint > 8)
{
theint /= 2;
}
return theint;
}
// this function is intentionally creating a lot of objects, to simulate complexity
static void taskfunc(List<ExampleClass> intlist, List<ExampleClass> anotherClassList, int skip, int take)
{
if (take == 0)
{
take = intlist.Count;
}
var partial = intlist.Skip(skip).Take(take).ToList();
for (var index = 0; index < partial.Count; ++index)
{
var testint = expensivefunc(index);
var newClass = new ExampleClass(partial[index]);
newDna.Inners[StaticRandom.Next(5)].SomeInt = testint;
anotherClassList.Add(new ExampleClass(newClass));
}
}
The expected result is that the list anotherClassList will be the same size as testlist and this happens when the lists are smaller or the complexity of the task operations is smaller. However, when I increase the volume of operations, the anotherClassList has a few indexes missing and sometimes some of the indexes in the list are null objects.
Example result:
Why does this happen, I have Task.WaitAll?
Your problem is it's just not thread-safe; you just can't add to a list<T> in a multi-threaded environment and expect it to play nice.
One way is to use lock or a thread safe collection, but I feel this all should be refactored (my OCD is going off all over the place).
private static object _sync = new object();
...
private static void TaskFunc(List<ExampleClass> intlist, List<ExampleClass> anotherClassList, int skip, int take)
{
...
var partial = intlist.Skip(skip).Take(take).ToList();
...
// note that locking here will likely drastically decrease any performance threading gain
lock (_sync)
{
for (var index = 0; index < partial.Count; ++index)
{
// this is your problem, you are adding to a list from multiple threads
anotherClassList.Add(...);
}
}
}
In short, I think you need to better thinking about the threading logic of your method, identify what you are trying to achieve, and how to make it conceptually thread safe (while keeping your performance gains).
After TheGeneral enlightened me that Lists are not thread safe, I changed the List to which I was adding in a thread, to an Array type and this fixed my issue.

Comparison time- Single Threads vs Multiple Threads differing in results

I wrote this code in purpose to test multi and single threading speeds. Thanks for all the feedback! I rewrote most of it based on the great comments I received. This now functions properly (maybe has a bug here or there), tests multi threads first, and takes an average to find a more accurate speed: (Scroll to bottom for cont.)
Main method Class
using System;
namespace SingleAndMultiThreading
{
internal class Threads
{
private static void Main(string[] args)
{
long numOfObjCreated;
int numberOfTests;
while (true)
{
try
{
Console.Write("Number of objects to create: ");
numOfObjCreated = Convert.ToInt64(Console.ReadLine());
break;
}
catch (Exception)
{
Console.WriteLine("Invalid input.");
}
}
while (true)
{
try
{
Console.Write("Number of tests to run: ");
numberOfTests = Convert.ToInt32(Console.ReadLine());
break;
}
catch (Exception)
{
Console.WriteLine("Invalid input.");
}
}
CalculateResults(numOfObjCreated, numberOfTests);
Console.ReadKey();
}
private static void CalculateResults(long numOfObjCreated, int numberOfTests)
{
double totalPercentages = 0;
for (var i = 0; i < numberOfTests; i++)
{
totalPercentages += CompleteTests(numOfObjCreated);
}
var accuracy = totalPercentages / numberOfTests;
if ((int)accuracy == 0)
{
Console.WriteLine("\nIn this case, neither single threading or multithreading is faster.\n" +
"They both run equally well under these conditions.\n");
return;
}
if (accuracy < 0)
{
Console.WriteLine("\nIn this case with {0} objects being created, single threading is faster!\n",
string.Format("{0:#,###0}", numOfObjCreated));
return;
}
Console.WriteLine("\nFrom {0} test(s), {1}% was the average percentage of increased speed in multithreading.\n",
string.Format("{0:#,###0}", numberOfTests), string.Format("{0:#,###0}", accuracy));
}
private static double CompleteTests(long numOfObjCreated)
{
Console.WriteLine("Computing...");
var numOfCores = Environment.ProcessorCount;
var timeForMultiThread = MultiThread.Run(numOfObjCreated, numOfCores);
var timeForSingleThread = SingleThread.Run(numOfObjCreated);
var percentFaster = ((timeForSingleThread / timeForMultiThread) * 100) - 100;
//note: .NET does its part in assigning a certian thread to its own core
Console.WriteLine("Using all {0} cores, creating {1} objects is {2}% faster.",
numOfCores, string.Format("{0:#,###0}", numOfObjCreated), string.Format("{0:#,###0}", percentFaster));
return percentFaster;
}
}
}
Single Threading Class
using System;
using System.Diagnostics;
namespace SingleAndMultiThreading
{
internal class SingleThread
{
public static double Run(long numOfObjCreated)
{
var watch = new Stopwatch();
watch.Start();
for (long i = 0; i < numOfObjCreated; i++)
{
new object();
}
watch.Stop();
var totalTime = watch.ElapsedTicks;
Console.WriteLine("The time to create {0} objects with 1 thread is: {1} ticks.",
string.Format("{0:#,###0}", numOfObjCreated), string.Format("{0:#,###0}", totalTime));
return totalTime;
}
}
}
Multi Threading Class
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Threading;
namespace SingleAndMultiThreading
{
internal class MultiThread
{
public static double Run(long numOfObjCreated, int numOfCores)
{
var watch = new Stopwatch();
var workerObject = new Worker(numOfObjCreated / numOfCores);
var listOfThreads = new List<Thread>();
for (long k = 0; k < numOfCores; k++)
{
var workerThread = new Thread(workerObject.DoWork);
listOfThreads.Add(workerThread);
}
watch.Start();
foreach (var thread in listOfThreads)
{
thread.Start();
}
byte countOfCompletedThreads = 0;
while (true)
{
foreach (var thread in listOfThreads)
if (!thread.IsAlive)
countOfCompletedThreads++;
if (countOfCompletedThreads == numOfCores)
break;
countOfCompletedThreads = 0;
}
watch.Stop();
var totalTime = watch.ElapsedTicks;
Console.WriteLine("The time to create {0} objects utilizing all {1} cores is: {2} ticks.",
string.Format("{0:#,###0}", numOfObjCreated), numOfCores, string.Format("{0:#,###0}", totalTime));
return totalTime;
}
}
}
Worker Class
namespace SingleAndMultiThreading
{
public class Worker
{
private readonly long _numOfObjToCreate;
public bool IsDone;
public Worker(long numOfObjToCreate)
{
_numOfObjToCreate = numOfObjToCreate;
}
public void DoWork()
{
for (long i = 0; i < _numOfObjToCreate; i++)
{
new object();
}
IsDone = true;
}
}
}
The output of this code is a bit too long to post (I urge you to copy and paste into your own IDE, its really fascinating). I guess the accepted answer that this doesn't give the same result per every test is due to CPU scheduling, other or minor issues like ASLR and such. More than one thing is happening aside from visual studio running this program, and priorities differently. Also thank you for pointing out that running multi threading first helps because of the already-done memory allocation!
Another thing to point out, I found this while running:
The spikes are when the process of multi threading takes place.

Task.Factory.StartNew is not working in console c# application

I am having a console application which reads the messages from Console.OpenStandardInput();
I am doing this in a task. but it seems to be not working.
static void Main(string[] args)
{
wtoken = new CancellationTokenSource();
readInputStream = Task.Factory.StartNew(() =>
{
wtoken.Token.ThrowIfCancellationRequested();
while (true)
{
if (wtoken.Token.IsCancellationRequested)
{
wtoken.Token.ThrowIfCancellationRequested();
}
else
{
OpenStandardStreamIn();
}
}
}, wtoken.Token
);
Console.ReadLine();
}
Here is my OpenStandardStreamIn function
public static void OpenStandardStreamIn()
{
Stream stdin = Console.OpenStandardInput();
int length = 0;
byte[] bytes = new byte[4];
stdin.Read(bytes, 0, 4);
length = System.BitConverter.ToInt32(bytes, 0);
string input = "";
for (int i = 0; i < length; i++)
{
input += (char)stdin.ReadByte();
}
Console.Write(input);
}
Any help? why it is not working in a continous loop
You basically have a race condition between Console.ReadLine and your task. Both of them are trying to read from standard input - and I certainly don't know what you should expect when reading from standard input from two threads at the same time, but it seems like something worth avoiding.
You can easily test this by changing the task to do something other than reading from standard input. For example:
using System;
using System.Threading;
using System.Threading.Tasks;
class Test
{
static void Main()
{
var wtoken = new CancellationTokenSource();
var readInputStream = Task.Factory.StartNew(() =>
{
for (int i = 0; i < 10; i++)
{
Console.WriteLine(i);
Thread.Sleep(200);
}
}, wtoken.Token);
Console.ReadLine();
}
}
If your real code needs to read from standard input, then I suggest you change Console.ReadLine() into readInputStream.Wait(). I'd also suggest you use Task.Run instead of Task.Factory.StartNew() if you're using .NET 4.5, just for readability - assuming you don't need any of the more esoteric behaviour of TaskFactory.StartNew.

Tasks in array -- only last one runs

I was experimenting with tasks. Why does this output 10 and not each value of the loop?
public static void StartTasks()
{
Task[] tasks = new Task[10];
for (int i = 0; i < 10; i++)
tasks[i] = new Task(() => Console.WriteLine(i));
foreach (Task task in tasks)
{
task.Start();
}
}
C# lambdas capture a reference to the variable, not the value of the variable.
If you want to capture the value, you need to make a copy of it first inside the loop which causes the capture to get the reference to the locally scoped unchanging variable.
public static void StartTasks()
{
Task[] tasks = new Task[10];
for (int i = 0; i < 10; i++) {
int j = i;
tasks[i] = new Task(() => Console.WriteLine(j));
}
foreach (Task task in tasks)
{
task.Start();
}
}
In addition to the accepted answer, you can also pass a parameter to the task. For example,
using System;
using System.Threading.Tasks;
static void StartTasks(int instances)
{
var tasks = new Task[instances];
for (int i = 0; i < instances; i++)
{
tasks[i] = new Task((object param) =>
{
var t = (int)param;
Console.Write("({0})", t);
}, i);
}
Parallel.ForEach<Task>(tasks, (t) => { t.Start(); });
Task.WaitAll(tasks);
}

Mutithreading in C# queries

I am new to multithreading in C# . I have a 3D array of size (x)(y)(z) and say i want to calculate the average of all the z samples for every (x,y) values. I wish to do that using multithreading (say 2 threads) where i will send half the array of size (x/2)*y*z for processing to thread1 and the other half to thread2.
How to do it? How do I pass and retrieve arguments from individual threads? A code example will be helpful.
Regards
I would recommend using PLINQ for this instead of threading this yourself.
It will let you run your query using LINQ syntax, but parallelize it (across all of your cores) automatically.
There are many reasons why it makes sense to use something PLINQ (as mentioned by Reed) or Parallel.For as implementing a low-overhead scheduler for distributing jobs over several cpus is a bit challenging.
So if I understood you correctly maybe this could get you started (on my 4 core machine the parallel version is 3 times faster than the single core version):
using System;
using System.Diagnostics;
using System.Linq;
using System.Threading.Tasks;
class Program
{
static void AverageOfZ (
double[] input,
double[] result,
int x,
int y,
int z
)
{
Debug.Assert(input.Length == x*y*z);
Debug.Assert(result.Length == x*y);
//Replace Parallel with Sequential to compare with non-parallel loop
//Sequential.For(
Parallel.For(
0,
x*y,
i =>
{
var begin = i*z;
var end = begin + z;
var sum = 0.0;
for (var iter = begin; iter < end; ++iter)
{
sum += input[iter];
}
result[i] = sum/z;
});
}
static void Main(string[] args)
{
const int X = 64;
const int Y = 64;
const int Z = 64;
const int Repetitions = 40000;
var random = new Random(19740531);
var samples = Enumerable.Range(0, X*Y*Z).Select(x => random.NextDouble()).ToArray();
var result = new double[X*Y];
var then = DateTime.Now;
for (var iter = 0; iter < Repetitions; ++iter)
{
AverageOfZ(samples, result, X, Y, Z);
}
var diff = DateTime.Now - then;
Console.WriteLine(
"{0} samples processed {1} times in {2} seconds",
samples.Length,
Repetitions,
diff.TotalSeconds
);
}
}
static class Sequential
{
public static void For(int from, int to, Action<int> action)
{
for (var iter = from; iter < to; ++iter)
{
action(iter);
}
}
}
PS. When going for concurrent performance its important to consider how the different cores access memory as its very easy to get disappointing performance otherwise.
Dot Net 3.5 and onward introduced many shortcut keywords that abstract away the complexity of things like Parallel for multi threading or Async for Async IO. Unfortunately this also provides no opportunity for understanding whats involved in these tasks. For example a colleague of mine was recently trying to use Async for a login method which returned an authentication token.
Here is the full blown multi threaded sample code for your scenario. to make it more real the sample code pretends that:
X is Longitude
Y is Lattitude
and Z is Rainfall Samples at the coordinates
The sample code also follows the Unit of Work design pattern where Rainfall Samples at each coordinate becomes a work item. It also creates discrete foreground threads instead of using a background threadpool.
Due to the simplicity of the work item and short compute time involved I've split the thread synchronization locks into two locks. one for the work queue and one for the output data.
Note: I have not used any Dot net shortcuts such as Lync so this code should run on Dot Net 2.0 as well.
In real world app development something like whats below would only be needed in complex scenarios such as stream processing of a continuous stream of work items in which case you would also need to implement output data buffers cleared regularly as the threads would effectively run forever.
public static class MultiThreadSumRainFall
{
const int LongitudeSize = 64;
const int LattitudeSize = 64;
const int RainFallSamplesSize = 64;
const int SampleMinValue = 0;
const int SampleMaxValue = 1000;
const int ThreadCount = 4;
public static void SumRainfallAndOutputValues()
{
int[][][] SampleData;
SampleData = GenerateSampleRainfallData();
for (int Longitude = 0; Longitude < LongitudeSize; Longitude++)
{
for (int Lattitude = 0; Lattitude < LattitudeSize; Lattitude++)
{
QueueWork(new WorkItem(Longitude, Lattitude, SampleData[Longitude][Lattitude]));
}
}
System.Threading.ThreadStart WorkThreadStart;
System.Threading.Thread WorkThread;
List<System.Threading.Thread> RunningThreads;
WorkThreadStart = new System.Threading.ThreadStart(ParallelSum);
int NumThreads;
NumThreads = ThreadCount;
if (ThreadCount < 1)
{
NumThreads = 1;
}
else if (NumThreads > (Environment.ProcessorCount + 1))
{
NumThreads = Environment.ProcessorCount + 1;
}
OutputData = new int[LongitudeSize, LattitudeSize];
RunningThreads = new List<System.Threading.Thread>();
for (int I = 0; I < NumThreads; I++)
{
WorkThread = new System.Threading.Thread(WorkThreadStart);
WorkThread.Start();
RunningThreads.Add(WorkThread);
}
bool AllThreadsComplete;
AllThreadsComplete = false;
while (!AllThreadsComplete)
{
System.Threading.Thread.Sleep(100);
AllThreadsComplete = true;
foreach (System.Threading.Thread WorkerThread in RunningThreads)
{
if (WorkerThread.IsAlive)
{
AllThreadsComplete = false;
}
}
}
for (int Longitude = 0; Longitude < LongitudeSize; Longitude++)
{
for (int Lattitude = 0; Lattitude < LattitudeSize; Lattitude++)
{
Console.Write(string.Concat(OutputData[Longitude, Lattitude], #" "));
}
Console.WriteLine();
}
}
private class WorkItem
{
public WorkItem(int _Longitude, int _Lattitude, int[] _RainFallSamples)
{
Longitude = _Longitude;
Lattitude = _Lattitude;
RainFallSamples = _RainFallSamples;
}
public int Longitude { get; set; }
public int Lattitude { get; set; }
public int[] RainFallSamples { get; set; }
}
public static int[][][] GenerateSampleRainfallData()
{
int[][][] Result;
Random Rnd;
Rnd = new Random();
Result = new int[LongitudeSize][][];
for(int Longitude = 0; Longitude < LongitudeSize; Longitude++)
{
Result[Longitude] = new int[LattitudeSize][];
for (int Lattidude = 0; Lattidude < LattitudeSize; Lattidude++)
{
Result[Longitude][Lattidude] = new int[RainFallSamplesSize];
for (int Sample = 0; Sample < RainFallSamplesSize; Sample++)
{
Result[Longitude][Lattidude][Sample] = Rnd.Next(SampleMinValue, SampleMaxValue);
}
}
}
return Result;
}
private static object SyncRootWorkQueue = new object();
private static Queue<WorkItem> WorkQueue = new Queue<WorkItem>();
private static void QueueWork(WorkItem SamplesWorkItem)
{
lock(SyncRootWorkQueue)
{
WorkQueue.Enqueue(SamplesWorkItem);
}
}
private static WorkItem DeQueueWork()
{
WorkItem Samples;
Samples = null;
lock (SyncRootWorkQueue)
{
if (WorkQueue.Count > 0)
{
Samples = WorkQueue.Dequeue();
}
}
return Samples;
}
private static int QueueSize()
{
lock(SyncRootWorkQueue)
{
return WorkQueue.Count;
}
}
private static object SyncRootOutputData = new object();
private static int[,] OutputData;
private static void SetOutputData(int Longitude, int Lattitude, int SumSamples)
{
lock(SyncRootOutputData)
{
OutputData[Longitude, Lattitude] = SumSamples;
}
}
private static void ParallelSum()
{
WorkItem SamplesWorkItem;
int SummedResult;
SamplesWorkItem = DeQueueWork();
while (SamplesWorkItem != null)
{
SummedResult = 0;
foreach (int SampleValue in SamplesWorkItem.RainFallSamples)
{
SummedResult += SampleValue;
}
SetOutputData(SamplesWorkItem.Longitude, SamplesWorkItem.Lattitude, SummedResult);
SamplesWorkItem = DeQueueWork();
}
}
}

Categories

Resources