I am learning about writing concurrent data structures and looking at the ConcurrentStack implementation as a learning exercise. As a starting point I have created a copy of the ConcurrentStack implementation by using IlSpy to decompile it into C#. I have limited myself to investigating and using just the Push and TryPop methods for the time being.
But my implementation is significantly slower than using the original.
My testing uses 4 threads (on a single socket, 4 core CPU) with thread affinity for each thread against a different core. Each thread performs 1,000,000 loops and each loop does three pushes and three pops. Running the testing many times the average time to complete all the threads is...
ConcurrentStack => 445ms
Clone of Push/TryPop => 670ms
So even though the code, as far as I can tell, is identical between the two the clone is about 50% slower. I run the testing 500 times in a run and take the average over all the runs. So I do not believe the issue is the initial JIT'ing of the code.
Any ideas why a copy of the methods would be so much slower?
C# Implementation
(For the sake of completeness I have provided the C# console app code that can be used to replicate the result. For anyone interesting in seeing if they get the same result as me.)
class Program
{
static void Main(string[] args)
{
int processors = Environment.ProcessorCount;
Console.WriteLine("Processors: {0}", processors);
List<Type> runnersT = new List<Type>() { typeof(ThreadRunnerConcurrent),
typeof(ThreadRunnerCASStack)};
int cycles = 500;
foreach (Type runnerT in runnersT)
{
long total = 0;
for (int i = 0; i < cycles; i++)
{
// Create a thread runner per processor
List<ThreadRunner> runners = new List<ThreadRunner>();
for (int j = 0; j < processors; j++)
{
ThreadRunner runner = Activator.CreateInstance(runnerT) as ThreadRunner;
runner.Processor = j;
runners.Add(runner);
}
// Start each runner going
Stopwatch sw = new Stopwatch();
sw.Start();
runners.ForEach((r) => r.Start());
// Wait for all the runners to exit
runners.ForEach((r) => r.Join());
runners.ForEach((r) => r.Check());
sw.Stop();
total += sw.ElapsedMilliseconds;
}
Console.WriteLine("{0} Average: {1}ms", runnerT.Name, (total / cycles));
}
Console.WriteLine("Finished");
Console.ReadLine();
}
}
abstract class ThreadRunner
{
private int _processor;
private Thread _thread;
public ThreadRunner()
{
}
public int Processor
{
get { return _processor; }
set { _processor = value; }
}
public void Start()
{
_thread = new Thread(new ParameterizedThreadStart(Run));
_thread.Start();
}
public void Join()
{
_thread.Join();
}
public abstract void Check();
protected abstract void Run(int cycles);
private void Run(object param)
{
SetAffinity();
Run(1000000);
}
private void SetAffinity()
{
#pragma warning disable 618
int osThreadId = AppDomain.GetCurrentThreadId();
#pragma warning restore 618
// Set the thread's processor affinity
ProcessThread thread = Process.GetCurrentProcess().Threads.Cast<ProcessThread>().Where(t => t.Id == osThreadId).Single();
thread.ProcessorAffinity = new IntPtr(1L << Processor);
}
}
class ThreadRunnerConcurrent : ThreadRunner
{
private static ConcurrentStack<int> _stack = new ConcurrentStack<int>();
protected override void Run(int cycles)
{
int ret;
for (int i = 0; i < cycles; i++)
{
_stack.Push(i);
_stack.Push(i);
while (!_stack.TryPop(out ret)) ;
_stack.Push(i);
while (!_stack.TryPop(out ret)) ;
while (!_stack.TryPop(out ret)) ;
}
}
public override void Check()
{
if (_stack.Count > 0)
Console.WriteLine("ThreadRunnerConcurrent has entries!");
}
}
class ThreadRunnerCASStack : ThreadRunner
{
private static CASStack<int> _stack = new CASStack<int>();
protected override void Run(int cycles)
{
int ret;
for (int i = 0; i < cycles; i++)
{
_stack.Push(i);
_stack.Push(i);
while (!_stack.TryPop(out ret)) ;
_stack.Push(i);
while (!_stack.TryPop(out ret)) ;
while (!_stack.TryPop(out ret)) ;
}
}
public override void Check()
{
if (_stack.Count > 0)
Console.WriteLine("ThreadRunnerCASStack has entries!");
}
}
class CASStack<T>
{
private class Node
{
internal readonly T m_value;
internal CASStack<T>.Node m_next;
internal Node(T value)
{
this.m_value = value;
this.m_next = null;
}
}
private volatile CASStack<T>.Node m_head;
public void Push(T item)
{
CASStack<T>.Node node = new CASStack<T>.Node(item);
node.m_next = this.m_head;
if (Interlocked.CompareExchange<CASStack<T>.Node>(ref this.m_head, node, node.m_next) == node.m_next)
return;
PushCore(node, node);
}
private void PushCore(Node head, Node tail)
{
SpinWait spinWait = default(SpinWait);
do
{
spinWait.SpinOnce();
tail.m_next = this.m_head;
}
while (Interlocked.CompareExchange<CASStack<T>.Node>(ref this.m_head, head, tail.m_next) != tail.m_next);
}
public bool TryPop(out T result)
{
CASStack<T>.Node head = this.m_head;
if (head == null)
{
result = default(T);
return false;
}
if (Interlocked.CompareExchange<CASStack<T>.Node>(ref this.m_head, head.m_next, head) == head)
{
result = head.m_value;
return true;
}
return TryPopCore(out result);
}
private bool TryPopCore(out T result)
{
CASStack<T>.Node node;
if (TryPopCore(1, out node) == 1)
{
result = node.m_value;
return true;
}
result = default(T);
return false;
}
private int TryPopCore(int count, out CASStack<T>.Node poppedHead)
{
SpinWait spinWait = default(SpinWait);
int num = 1;
Random random = new Random(Environment.TickCount & 2147483647);
CASStack<T>.Node head;
int num2;
while (true)
{
head = this.m_head;
if (head == null)
break;
CASStack<T>.Node node = head;
num2 = 1;
while (num2 < count && node.m_next != null)
{
node = node.m_next;
num2++;
}
if (Interlocked.CompareExchange<CASStack<T>.Node>(ref this.m_head, node.m_next, head) == head)
goto Block_5;
for (int i = 0; i < num; i++)
spinWait.SpinOnce();
num = (spinWait.NextSpinWillYield ? random.Next(1, 8) : (num * 2));
}
poppedHead = null;
return 0;
Block_5:
poppedHead = head;
return num2;
}
}
#endregion
ConcurrentStack<T> has one advantage that your CASStack<T> doesn't have, even though the code for both is identical.
ConcurrentStack<T> has an ngen'd native image installed on your computer that was compiled when you installed your .Net framework install. Your CASStack<T> is being compiled via JIT, and because JIT has to be fast, it does not perform as many optimizations as the AOT compiler in ngen.
I tested your code on my computer. Without ngen'ing your image, I got these results:
Processors: 4
ThreadRunnerConcurrent Average: 764ms
ThreadRunnerCASStack Average: 948ms
Finished
After ngening:
Processors: 4
ThreadRunnerConcurrent Average: 778ms
ThreadRunnerCASStack Average: 742ms
Finished
Related
What does С# SemaphoreSlim guarantee? Is it full memory barrier? What we can be sure of code between two different semaphore Wait() and Release()?
Does sequence of different SemaphoreSlim Wait(), Release() and Interlocked methods and Volatile. Write/Read always keep it order in every threads?
public T Dequeue()
{
canReadCountSemaphoreSlim.Wait();
int i = Interlocked.Decrement(ref end);
T val = Volatile.Read(ref buf[i]);
canWriteCountSemaphoreSlim.Release();
return val;
}
public void Enqueue(T val)
{
canWriteCountSemaphoreSlim.Wait();
int i = Interlocked.Decrement(ref start);
Volatile.Write(ref buf[i], val);
canReadCountSemaphoreSlim.Release();
}
Full code:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading;
namespace Program
{
public class BlockingRingQueue<T> where T: class
{
const int BUFSIZE_LOG2 = 10;
const int BUFSIZE = 1 << BUFSIZE_LOG2;
T[] buf = new T[BUFSIZE];
int start = 0;
int end = 0;
SemaphoreSlim canReadCountSemaphoreSlim = new SemaphoreSlim(0);
SemaphoreSlim canWriteCountSemaphoreSlim = new SemaphoreSlim(BUFSIZE);
public T Dequeue()
{
canReadCountSemaphoreSlim.Wait();
int i = Interlocked.Decrement(ref end);
i = PositiveMod(i, BUFSIZE);
T val = Volatile.Read(ref buf[i]);
canWriteCountSemaphoreSlim.Release();
return val;
}
public void Enqueue(T val)
{
canWriteCountSemaphoreSlim.Wait();
int i = Interlocked.Decrement(ref start);
i = PositiveMod(i, BUFSIZE);
Volatile.Write(ref buf[i], val);
canReadCountSemaphoreSlim.Release();
}
static int PositiveMod(int a, int b) => ((a % b) + b) % b;
}
public class Program
{
const int READ_THREAD_COUNT = 3;
static BlockingRingQueue<string> queue = new BlockingRingQueue<string>();
public static void Main(string[] args)
{
new Thread(() => Pushing("ABCD")) { Name = "0" }.Start();
for(int i = 1; i <= READ_THREAD_COUNT; i++)
new Thread(Poping) { Name = i.ToString() }.Start();
}
public static void Poping()
{
while(true)
{
RandSpinWait();
var val = queue.Dequeue();
if("" == val)
break;
Console.WriteLine(val + Thread.CurrentThread.Name + ' ');
}
//Console.WriteLine('!' + Thread.CurrentThread.Name + ' ');
}
public static void Pushing(string chars)
{
RandSpinWait();
var vals = chars.ToCharArray().Select(c => $"{c}")
.Concat(Enumerable.Repeat("",READ_THREAD_COUNT));
foreach(string v in vals)
queue.Enqueue(v);
}
public static void RandSpinWait() => Thread.SpinWait(new Random().Next(1));
}
}
Here is some of my Code for a plugin on a game, if anyone could help it would be greatly appreciated.
namespace Oxide.Plugins
{
[Info("Fake Player Add Plugin", "NOT asmr", 0.0)]
class FakePlayerAddPlugin : CSharpPlugin
{
public int MaximumFakePlayer = 50; //if server has more than x legit players, dont add fakes.
public int PlayerFakeAdd = 43; //how many fake players to add
public float PlayerCheckDelay = 120f; //how often to check for player changes in seconds
void Loaded()
{
Puts("FakePlayerAddPlugin Loaded!");
}
float lasttime = 0f;
void OnFrame(float delta)
{
if (lasttime + PlayerCheckDelay > Time.realtimeSinceStartup) return;
lasttime = Time.realtimeSinceStartup;
var pcount = BasePlayer.activePlayerList?.Count(n => n.IsValid()) ?? 0;
var fcount = BasePlayer.activePlayerList?.Count(n => n == null) ?? 0;
if (pcount >= MaximumFakePlayer)
{
RemoveFakePlayers();
return;
}
if (PlayerFakeAdd > 0 && fcount != PlayerFakeAdd)
{
RemoveFakePlayers();
AddFakePlayers(PlayerFakeAdd);
}
}
public void RemoveFakePlayers()
{
BasePlayer.activePlayerList.RemoveAll(n => n == null);
}
public void AddFakePlayers(int amount)
{
for (int i = 0; i < amount; i++)
{
BasePlayer.activePlayerList.Add(null);
}
}
}
}
"The member `System.Collections.Generic.List.Count' cannot be used as method or delegate" is what I am getting back.
You can think that class is strange, but it's educational, so i have to do it this way.
class petriPool {
private int[] nodes = new int[3];
private int counter;
private readonly object _lock = new object();
public petriPool (int n, int m) {
this.nodes[0] = n;
this.nodes[1] = 0;
this.nodes[2] = 0;
this.counter = m;
}
public bool start() {
lock (_lock) {
if (this.nodes[0] != 0 && counter != 0) {
this.nodes[0]--;
this.nodes[1]++;
counter--;
return true;
} else
return false;
}}
public bool stop() {
lock (_lock) {
if (this.nodes[1] != 0) {
this.nodes[1]--;
this.nodes[2]++;
counter++;
return true;
} else
return false;
}}
}
I need to make start() method wait until counter get value > 0. I can do so:
public bool start() {
while (this.counter == 0) {
Thread.Sleep(10);
}
lock (_lock) {
if (this.nodes[0] != 0 && counter != 0) {
this.nodes[0]--;
this.nodes[1]++;
counter--;
return true;
} else
return false;
}}
But isn't here any better solution? I mean it looks like i can lose less time to sleep.
To see, for what it needed. I call start before starting thread and stop in the end of thread. So counter must reflect maximum number of threads running in the same time.
Signalling like this is done by using Event classes. In your case, ManualResetEventSlim should be enough.
You can Wait for it instead of the while-loop and you can Set it when the counter hits zero.
You can think of using `ManualResetEvent' for comminicating between two threads.
Following untested code might help.
class petriPool
{
private int[] nodes = new int[3];
private int counter;
private ManualResetEvent mevent;
private readonly object _lock = new object();
public petriPool (int n, int m)
{
mevent= new ManualResetEvent(false);
this.nodes[0] = n;
this.nodes[1] = 0;
this.nodes[2] = 0;
this.counter = m;
}
public bool start()
{
lock (_lock)
{
if (this.nodes[0] != 0 && counter != 0)
{
this.nodes[0]--;
this.nodes[1]++;
counter--;
if(counter>0) mevent.Set();
return true;
} else
return false;
}
}
public bool stop()
{
mevent.WaitOne();
lock (_lock) {
if (this.nodes[1] != 0) {
this.nodes[1]--;
this.nodes[2]++;
counter++;
return true;
} else
return false;
}
//reset 'mevent' if you want.
}
}
To clarify I'm doing this in Unity3D, which may or may not be important?
I'm trying to figure out if I can pass a value by ref to an IEnumerator function that does not yield. If I try to do it with one that yields, VS2010 complains ("Iterators cannot have ref or out parameters"), but, if I wrap the call up with a similar IEnumerator function that calls the yielding function, but does not yield itself, the error goes away and things appear to work. I'm trying to find out if I'm in unexpected behavior land or if this is normal behavior.
Here's an example of what I'm doing:
IEnumerator Wrapper(ref int value)
{
int tmp = ++value; // This is the ONLY place I want the value
return Foo(tmp); // of the ref parameter to change!
} // I do _NOT_ want the value of the ref
// parameter to change in Foo()!
IENumerator Foo(int value)
{
// blah blah
someFunc(value);
someSlowFunc();
yield return null;
yield return null;
}
Looks good. The top function just returns an IEnumerator - but is otherwise a normal function. The bottom function is an IEnumerator [transformed into a funky class by the compiler] and as such cannot have a ref value.
The top function could have been written as such:
void Wrapper(ref int value, out IEnumerator coroutine)
{
int tmp = ++value;
coroutine = Foo(tmp);
}
This is a little more messy - but it shows how this is a normal function that deals with two pieces of data. A int passed by referance, and a IEnumerator [just a class] that it returns [in this example by using out].
Supplemental: This is how stuff works behind the scenes:
static void Main(string[] args)
{
//Lets get the 'IEnumerable Class' that RandomNum gets compiled down into.
var IEnumeratorClass = RandomNum(10, 10);
//All an IEnumerable is is a class with 'GetEnumerator'... so lets get it!
var IEnumerableClass = IEnumeratorClass.GetEnumerator();
//It can be used like so:
while (IEnumerableClass.MoveNext())
{
Console.WriteLine(IEnumerableClass.Current);
}
Console.WriteLine(new String('-', 10));
//Of course, that's a lot of code for a simple job.
//Luckily - there's some nice built in functionality to make use of this.
//This is the same as above, but much shorter
foreach (var random in RandomNum(10, 10)) Console.WriteLine(random);
Console.WriteLine(new String('-', 10));
//These simple concepts are behind Unity3D coroutines, and Linq [which uses chaining extensively]
Enumerable.Range(0, 100).Where(x => x % 2 == 0).Take(5).ToList().ForEach(Console.WriteLine);
Console.ReadLine();
}
static Random rnd = new Random();
static IEnumerable<int> RandomNum(int max, int count)
{
for (int i = 0; i < count; i++) yield return rnd.Next(i);
}
//This is an example of what the compiler generates for RandomNum, see how boring it is?
public class RandomNumIEnumerableCompiled : IEnumerable<int>
{
int max, count;
Random _rnd;
public RandomNumIEnumerableCompiled(int max, int count)
{
this.max = max;
this.count = count;
_rnd = rnd;
}
IEnumerator IEnumerable.GetEnumerator()
{
return new RandomNumIEnumeratorCompiled(max, count, rnd);
}
IEnumerator<int> IEnumerable<int>.GetEnumerator()
{
return new RandomNumIEnumeratorCompiled(max, count, rnd);
}
}
public class RandomNumIEnumeratorCompiled : IEnumerator<int>
{
int max, count;
Random _rnd;
int current;
int currentCount = 0;
public RandomNumIEnumeratorCompiled(int max, int count, Random rnd)
{
this.max = max;
this.count = count;
_rnd = rnd;
}
int IEnumerator<int>.Current { get { return current; } }
object IEnumerator.Current { get { return current; } }
public bool MoveNext()
{
if (currentCount < count)
{
currentCount++;
current = rnd.Next(max);
return true;
}
return false;
}
public void Reset() { currentCount = 0; }
public void Dispose() { }
}
What would the MSDN sample look like without the yield keyword? You may use any example if you perfer. I would just like to understand what is going on under the hood.
Is the yield operator eagerly or lazily evaluated?
Sample:
using System;
using System.Collections;
public class List
{
public static IEnumerable Power(int number, int exponent)
{
int counter = 0;
int result = 1;
while (counter++ < exponent)
{
result = result * number;
yield return result;
}
}
static void Main()
{
// Display powers of 2 up to the exponent 8:
foreach (int i in Power(2, 8))
{
Console.Write("{0} ", i);
}
}
}
MSDN - Yield Keyword
If the yield operator is eagerly evaluated here is my guess:
public static IEnumerable Power(int number, int exponent)
{
int counter = 0;
int result = 1;
List<int> powers;
while (counter++ < exponent)
{
result = result * number;
powers.add(result);
}
return powers;
}
I have no clue what it might look like if the yield operator is lazily evaluated.
Update: Reflector gives this:
public class List
{
// Methods
public List();
private static void Main();
public static IEnumerable Power(int number, int exponent);
// Nested Types
[CompilerGenerated]
private sealed class <Power>d__0 : IEnumerable<object>, IEnumerable, IEnumerator<object>, IEnumerator, IDisposable
{
// Fields
private int <>1__state;
private object <>2__current;
public int <>3__exponent;
public int <>3__number;
private int <>l__initialThreadId;
public int <counter>5__1;
public int <result>5__2;
public int exponent;
public int number;
// Methods
[DebuggerHidden]
public <Power>d__0(int <>1__state);
private bool MoveNext();
[DebuggerHidden]
IEnumerator<object> IEnumerable<object>.GetEnumerator();
[DebuggerHidden]
IEnumerator IEnumerable.GetEnumerator();
[DebuggerHidden]
void IEnumerator.Reset();
void IDisposable.Dispose();
// Properties
object IEnumerator<object>.Current { [DebuggerHidden] get; }
object IEnumerator.Current { [DebuggerHidden] get; }
}
}
IEnumerator<object> IEnumerable<object>.GetEnumerator()
{
List.<Power>d__0 d__;
if ((Thread.CurrentThread.ManagedThreadId == this.<>l__initialThreadId) && (this.<>1__state == -2))
{
this.<>1__state = 0;
d__ = this;
}
else
{
d__ = new List.<Power>d__0(0);
}
d__.number = this.<>3__number;
d__.exponent = this.<>3__exponent;
return d__;
}
private bool MoveNext()
{
switch (this.<>1__state)
{
case 0:
this.<>1__state = -1;
this.<counter>5__1 = 0;
this.<result>5__2 = 1;
while (this.<counter>5__1++ < this.exponent)
{
this.<result>5__2 *= this.number;
this.<>2__current = this.<result>5__2;
this.<>1__state = 1;
return true;
Label_0065:
this.<>1__state = -1;
}
break;
case 1:
goto Label_0065;
}
return false;
}
First off, yield is not an operator. yield return and yield break are statements.
There are plenty of articles available on how the compiler implements iterator blocks. Start by reading the C# specification section on iterator blocks; it gives some suggestions for how an implementer of C# might want to go about it.
Next read Raymond Chen's series "The implementation of iterators in C# and its consequences"
http://www.bing.com/search?q=raymond+chen+the+implementation+of+iterators
Next, read Jon Skeet's book chapter on the subject:
http://csharpindepth.com/Articles/Chapter6/IteratorBlockImplementation.aspx
If after all that you are still interested then read my series on the design factors that went into this feature:
http://blogs.msdn.com/b/ericlippert/archive/tags/iterators/
Back in the good old days, before we had the yield operator, we used to write classes which implemented IEnumerator.
class PowerEnumerator : IEnumerator<int>
{
private int _number;
private int _exponent;
private int _current = 1;
public PowerEnumerator(int number, int exponent)
{
_number = number;
_exponent = exponent;
}
public bool MoveNext()
{
_current *= number;
return _exponent-- > 0;
}
public int Current
{
get
{
if (_exponent < 0) throw new InvalidOperationException();
return _current;
}
}
}
Or something like that. It wasn't fun, let me tell you.
Let .NET Reflector decompile it. It's a generic solution (a state machine actually), but quite complex, > 20 lines of codes if I remember correctly.
Lazy. That's the point why yield can be quite efficient.
It would be a custom implementation of IEnumerable<T>, not leaning on an existing implementation such as List<T>
Lazily.
More info available here.