static void Main(string[] args)
{
var sw = new Stopwatch();
sw.Start();
int noOfThreads = Environment.ProcessorCount;
//int minVal = 1;
int maxVal = 10000000;
int blockSize = maxVal / noOfThreads;
List<Thread> threads = new List<Thread>();
List<List<int>> results = new List<List<int>>();
object thisLock = new object();
for (int i = 0; i < noOfThreads; ++i)
{
lock(thisLock)
{
Thread th = new Thread(() =>
{
results.Add(GetPrimeNumbers(i * blockSize, i * blockSize + blockSize));
});
th.Start();
threads.Add(th);
}
}
foreach (var elem in threads)
elem.Join();
}
private static List<int> GetPrimeNumbers(int low, int high)
{
List<int> result = new List<int>();
//Debug.WriteLine("Low: {0}. High: {1}", low, high);
for(int i = low; i <= high; ++i)
{
if (IsPrime(i))
result.Add(i);
}
return result;
}
static bool IsPrime(int number)
{
if (number % 2 == 0)
return false;
else
{
var topLimit = (int)Math.Sqrt(number);
for (int i = 3; i <= topLimit; i += 2)
if (number % i == 0)
return false;
return true;
}
}
With the above code, I was expecting that when I put breakpoint in the GetPrimeNumbers(int low, int high) I would see range of values for low and high, e.g: (0, 1250000), (1250000, 2500000).....(8750000, 10000000). But what I observing is that there are certain blocks that gets passed multiple times - (2500000, 3750000) while certain do not passed at all -(0, 1250000) and this behaviour also matches the results I am getting.
I am curious why I am seeing this behaviour. Is there a way to prevent this?
I am aware of the fact that I can use Parallel.For() and over here I do see the expected behaviour at breakpoint in GetPrimes(int low, int high). But as I mentioned before I am curious why I am seeing the former behaviour.
Thanks in advance!
The problem is that a for loop reuses the same i variable across iterations, and your thread delegate is closing over that variable.
There are various ways to fix this. A simple one is to use a new variable declared within your loop:
for (int i = 0; i < noOfThreads; ++i)
{
int j = i; // capture the value
lock(thisLock)
{
Thread th = new Thread(() =>
{
results.Add(GetPrimeNumbers(j * blockSize, j * blockSize + blockSize));
});
th.Start();
threads.Add(th);
}
}
This still has other issues, though. I'd recommend something more like this:
var allPrimeNumbers = Enumerable.Range(0, numberOfThreads)
.AsParallel()
.SelectMany(i => GetPrimeNumbers(i * blockSize, i * blockSize + blockSize))
.ToList();
Further Reading
Is there a reason for C#'s reuse of the variable in a foreach?
StriplingWarrior had it close, but as mentioned in the comments, you still have a threading bug. You need to move the lock inside the Thread action. Also, to get the best performance, hold the lock for the shortest amount of time possible, which is when modifying the shared results variable. To do that I separated the GetPrimeNumbers call from the results.Add call.
for (int i = 0; i < noOfThreads; ++i)
{
int j = i; // capture the value
Thread th = new Thread(() =>
{
result = GetPrimeNumbers(j * blockSize, j * blockSize + blockSize);
lock(thisLock)
{
results.Add(result);
}
});
th.Start();
threads.Add(th);
}
Also, unless you really need to manage your own threads I would recommend using Tasks (TPL) instead. Here is a modification using Tasks
Task<List<int>> tasks = new Task<List<int>>();
for (int i = 0; i < noOfThreads; ++i)
{
int j = i; // capture the value
tasks.Add(Task.Run(() => GetPrimeNumbers(j * blockSize, j * blockSize + blockSize)));
}
Task.WaitAll(tasks);
results = tasks.Select(t => t.Result).ToList();
Related
private static int[] Sort(int[] arr)
{
int temp;
int[] result = new int[arr.Length];
Array.Copy(arr, result, arr.Length);
for (int i = 0; i < result.Length - 1; i++)
{
for (int j = i + 1; j < result.Length; j++)
{
if (result[i] > result[j])
{
temp = result[i];
result[i] = result[j];
result[j] = temp;
}
}
}
return result;
}
public static List<List<T>> ChunkBy<T>(this List<T> source, int chunkSize)
{
return source
.Select((x, i) => new { Index = i, Value = x })
.GroupBy(x => x.Index / chunkSize)
.Select(x => x.Select(v => v.Value).ToList())
.ToList();
}
...
List<List<int>> unsortedLists = list.ChunkBy(chunkSize);
int count = unsortedLists.Count;
List<List<int>> lists = new List<List<int>>();
Task<List<int>>[] tasks = new Task<List<int>>[count];
//from 0 to 9
for (int i = 0; i < count; i++)
{
tasks[i] = new Task<List<int>>(() =>
{
lists[i] = Sort(unsortedLists[i].ToArray()).ToList(); //out of range exception i = 10
return lists[i];
});
}
for(int i = 0; i < count; i++)
{
tasks[i].Start();
}
for(int i = 0; i < count; i++)
{
lists[i] = await tasks[i]; //array out of range
}
I'm facing extremely weird error. I have slightly simplified my code and provided the important part.
I'm creating a bunch of tasks, starting them, and then it fails instantly at 3rd loop. For some reason it uses old iterator that ended on 9, and increases it to 10, creating exception. I have no idea what to do, at the moment it looks like error of the environment.
You should never use the task constructor. In this case, use Task.Run instead.
Your other problem is that the for loop in C# only defines a single loop variable, and lambdas close over variables, not values. This is why you're seeing an i that is 10.
The simplest solution is to do away with the for loops completely:
var tasks = Enumerable.Range(0, count)
.Select(i => Task.Run(() =>
{
lists[i] = Sort(unsortedLists[i].ToArray()).ToList();
return lists[i];
}))
.ToList();
var results = await Task.WhenAll(tasks);
I have a Func like this:
int loopMax = 10, taskMax = 10;
int executionCounter = 0;
Func<int> calculator = new Func<int>(() =>
{
executionCounter++;
int result = 0;
for (int i = 0; i < loopMax; i++)
{
Thread.Sleep(100);
if (result + i >= int.MaxValue)
result = 0;
result += i;
}
return result;
});
Which could be called by multiple threads. For example like this:
Task[] tasks = new Task[taskMax];
for (int i = 0; i < taskMax; i++)
{
tasks[i] = Task.Run(() => _=calculator());
}
Task.WaitAll(tasks);
I need to share the calculator function across all threads, and makes this function being called only once. In fact the executionCounter variable's value after running this code should remain 1, and all of the threads should have the same return value.
UPDATE 1
I think I can solve it if I find a way to server the first thread and block every other threads and after completion of first thread's method call, signal the methods result to other threads and also cancel them, to prevent them calling calculator again.
Using lock inside the method also is not what I am looking for, because in that case again the calculator is being called multiple times...
It seems that you need the Lazy<T> class. This class provides support for lazy initialization. Here is how you could use it:
Lazy<int> lazyCalculator = new Lazy<int>(calculator);
Task[] tasks = new Task[taskMax];
for (int i = 0; i < taskMax; i++)
{
tasks[i] = Task.Run(() => _ = lazyCalculator.Value);
}
Task.WaitAll(tasks);
When a Lazy instance is constructed, it can take an optional LazyThreadSafetyMode argument. The default value of this argument is ExecutionAndPublication, with the behavior described below:
Locks are used to ensure that only a single thread can initialize a Lazy<T> instance in a thread-safe manner.
It looks like you want is that your Calculator method can be executed by any thread, but this method should be executed only once. If it is true, then we would use lock statement.
The purpose of lock statement is:
The lock statement acquires the mutual-exclusion lock for a given
object, executes a statement block, and then releases the lock
An example:
static object lockCalculatorMethod = new object();
static int executionCounter = 0;
static int loopMax = 10;
static int taskMax = 10;
static void Main(string[] args)
{
Task[] tasks = new Task[taskMax];
for (int i = 0; i < taskMax; i++)
{
tasks[i] = Task.Run(() => _ = Calculator());
}
Task.WhenAll(tasks);
}
and Calculator method:
static int Calculator()
{
lock (lockCalculatorMethod)
{
if (executionCounter < 1)
{
executionCounter++;
int result = 0;
for (int i = 0; i < loopMax; i++)
{
Thread.Sleep(100);
if (result + i >= int.MaxValue)
{
result = 0;
result += i;
}
}
return result;
}
else
return -1;
}
}
UPDATE:
If you want to cache result and avoid recalculation while calling by other threads, then you can use threadSafe collection ConcurrentQueue<T> and just get items from this collection:
static object lockCalculatorMethod = new object();
static ConcurrentQueue<int> queue = new ConcurrentQueue<int>();
static int executionCounter = 0;
static int loopMax = 7;
static int taskMax = 7;
static void Main(string[] args)
{
Task[] tasks = new Task[taskMax];
for (int i = 0; i < taskMax; i++)
{
tasks[i] = Task.Run(() =>
{
var result = Calculator();
Console.WriteLine(result);
});
}
Task.WaitAll(tasks);
}
And Calculator method:
static int Calculator()
{
int result = 0;
lock (lockCalculatorMethod)
{
int lockResult = 0;
if (executionCounter < 1)
{
executionCounter++;
for (int i = 0; i < loopMax; i++)
{
Thread.Sleep(100);
lockResult += i;
}
queue.Enqueue(lockResult);
}
}
queue.TryPeek(out result);
return result;
}
I have a Dictinary<int, int> which populated with ~5Mio records.
While the performance is reasonably good considering the volume of data I'm looking to improve it. I don't care about data population my main concern is data retrieval.
First thing I'd done - I changed value type from decimal to int which got me twice better performance.
Then I tried trading 'genericness' for speed by passing non-generic IntComparer into Dictionary's ctor as follows:
public class IntegerComparer : IEqualityComparer<int>
{
public bool Equals(int x, int y)
{
return x == y;
}
public int GetHashCode(int obj)
{
return obj;
}
}
but to no avail, performance got degraded by 20%. SortedDictionary slowed things down by 10 times (didn't have much hope on it though). Wonder what can be done for improving the performance if any?
here's a synthetic test just for measuring performance:
var d = new Dictionary<int, int>();
for (var i = 0; i < 5000000; i++)
{
d.Add(i, i + 5);
}
var r = new Random();
var s = new Stopwatch();
s.Start();
for (var i = 0; i < 100000; i++)
{
var r0 = Enumerable.Range(1, 255).Select(t => r.Next(5000000));
var values = r0.Select(t => d[t]).ToList();
}
s.Stop();
MessageBox.Show(s.ElapsedMilliseconds.ToString());
As the comments point out your test is seriously flawed...
If the highest index you will see is 5,000,0000 then an array will be the most performant option. I've tried to quickly rewrite your test to try an eliminate some of the error. There will probably be mistakes, writing accurate benchmarks is hard.
static void Main(string[] args)
{
var loopLength = 100000000;
var d = new Dictionary<int, int>();
for (var i = 0; i < 5000000; i++)
{
d.Add(i, i + 5);
}
var ignore = d[7];
var a = new int[5000000];
for (var i = 0; i < 5000000; i++)
{
a[i] = i + 5;
}
ignore = a[7];
var s = new Stopwatch();
var x = 1;
s.Start();
for (var i = 0; i < loopLength; i++)
{
x = (x * 1664525 + 1013904223) & (4194303);
var y = d[x];
}
s.Stop();
Console.WriteLine(s.ElapsedMilliseconds);
s.Reset();
x = 1;
s.Start();
for (var i = 0; i < loopLength; i++)
{
x = (x * 1664525 + 1013904223) & (4194303);
var y = a[x];
}
s.Stop();
Console.WriteLine(s.ElapsedMilliseconds);
Console.ReadKey(true);
}
x coefficients borrowed from Wikipedia's Linear congruential generator article
My results:
24390
2076
That makes the array over 12x faster
The operation is to multiply every i-th element of a array (call it A) and i-th element of a matrix of the same size(B), and update the same i-th element of A with the value earned.
In a arithmetic formula,
A'[i] = A[i]*B[i] (0 < i < n(A))
What's the best way to optimize this operation in a multi-core environment?
Here's my current code;
var learningRate = 0.001f;
var m = 20000;
var n = 40000;
var W = float[m*n];
var C = float[m*n];
//my current code ...[1]
Parallel.ForEach(Enumerable.Range(0, m), i =>
{
for (int j = 0; j <= n - 1; j++)
{
W[i*n+j] *= C[i*n+j];
}
});
//This is somehow far slower than [1], but I don't know why ... [2]
Parallel.ForEach(Enumerable.Range(0, n*m), i =>
{
w[i] *= C[i]
});
//This is faster than [2], but not as fast as [1] ... [3]
for(int i = 0; i < m*n; i++)
{
w[i] *= C[i]
}
Tested the following method. But the performance didn't get better at all.
http://msdn.microsoft.com/en-us/library/dd560853.aspx
public static void Test1()
{
Random rnd = new Random(1);
var sw1 = new Stopwatch();
var sw2 = new Stopwatch();
sw1.Reset();
sw2.Reset();
int m = 10000;
int n = 20000;
int loops = 20;
var W = DummyDataUtils.CreateRandomMat1D(m, n);
var C = DummyDataUtils.CreateRandomMat1D(m, n);
for (int l = 0; l < loops; l++)
{
var v = DummyDataUtils.CreateRandomVector(n);
var b = DummyDataUtils.CreateRandomVector(m);
sw1.Start();
Parallel.ForEach(Enumerable.Range(0, m), i =>
{
for (int j = 0; j <= n - 1; j++)
{
W[i*n+j] *= C[i*n+j];
}
});
sw1.Stop();
sw2.Start();
// Partition the entire source array.
var rangePartitioner = Partitioner.Create(0, n*m);
// Loop over the partitions in parallel.
Parallel.ForEach(rangePartitioner, (range, loopState) =>
{
// Loop over each range element without a delegate invocation.
for (int i = range.Item1; i < range.Item2; i++)
{
W[i] *= C[i];
}
});
sw2.Stop();
Console.Write("o");
}
var t1 = (double)sw1.ElapsedMilliseconds / loops;
var t2 = (double)sw2.ElapsedMilliseconds / loops;
Console.WriteLine("t1: " + t1);
Console.WriteLine("t2: " + t2);
}
Result:
t1: 119
t2: 120.4
The problem is that while invoking a delegate is relatively fast, it adds up when you invoke it many times and the code inside the delegate is very simple.
What you could try instead is to use a Partitioner to specify the range you want to iterate, which allows you to iterate over many items for each delegate invocation (similar to what you're doing in [1]):
Parallel.ForEach(Partitioner.Create(0, n * m), partition =>
{
for (int i = partition.Item1; i < partition.Item2; i++)
{
W[i] *= C[i];
}
});
I was trying out a code sample from this book that should demonstrate that the post decrement operator is not atomic. The code is as I have entered it into LinqPad.
void Main() {
var count = 0;
do {
_x = 10000;
for (int i = 0; i < 100; i++) {
new Thread(Go).Start();
}
Thread.Sleep(1000);
Console.WriteLine("Try "+ count);
count++;
} while (_x == 0);
Console.WriteLine(_x);
}
int _x = 10000;
void Go() { for (int i = 0; i < 100; i++) _x--; }
The idea is that decrementing _x in parallel on multiple threads without locking may lead to a value of _x other then 0 when all the threads have finished.
My problem is that no matter how long I seem to try I always get 0 as a result.
I have run the code on two different computers (both Windows 7) and two different versions of .NET and both give me the same result.
What am I missing here?
I have added 100000 interations in Go as Lasse V. Karlsen has suggested. The code now works as expected on the first try. I have also moved the Thread creation out of the loop and reduced the thread count as Henk Holterman has suggested.
void Main()
{
var count = 0;
do {
_x = 1000000;
var threads = Enumerable.Range(0,10).Select (_ => new Thread(Go)).ToList();
foreach (var t in threads)
{
t.Start();
}
Thread.Sleep(1000);
Console.WriteLine("Try "+ count);
count++;
} while (_x == 0);
Console.WriteLine(_x);
}
int _x;
void Go() { for (int i = 0; i < 100000; i++) _x--; }
The code now works as expected.