I have look around a ConcurrentList implementation, and found this one:
http://deanchalk.com/2010/10/c-fast-parallel-concurrentlistt-implementation/
The code works fine in the implementation you can find on the given webpage, but when I change the the DoWork method:
original:
static void DoWork(ICollection<int> list, int count)
{
for (var i = 0; i < count; i++)
{
list.Add(i);
// use spinwait to emulate work but avoiding
// context switching
Thread.SpinWait(100000);
}
}
changes made:
static void DoWork(ICollection<int> list, int count)
{
for (var i = 0; i < count; i++)
{
var foo = list.Any(x => x == 2); //<-- throws exception.. (Enumeration changed)
if (foo == true)
{
//do something...
}
list.Add(i);
}
}
This change will break the execution eventually, telling me that the enumeration has been modified.
How can I make this code work?
---- Concurrent List ----
public class ConcurrentList<T> : IList<T>, IList
{
private readonly List<T> underlyingList = new List<T>();
private readonly object syncRoot = new object();
private readonly ConcurrentQueue<T> underlyingQueue;
private bool requiresSync;
private bool isDirty;
public ConcurrentList()
{
underlyingQueue = new ConcurrentQueue<T>();
}
public ConcurrentList(IEnumerable<T> items)
{
underlyingQueue = new ConcurrentQueue<T>(items);
}
private void UpdateLists()
{
if (!isDirty)
return;
lock (syncRoot)
{
requiresSync = true;
T temp;
while (underlyingQueue.TryDequeue(out temp))
underlyingList.Add(temp);
requiresSync = false;
}
}
public IEnumerator<T> GetEnumerator()
{
lock (syncRoot)
{
UpdateLists();
return underlyingList.GetEnumerator();
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
public void Add(T item)
{
if (requiresSync)
lock (syncRoot)
underlyingQueue.Enqueue(item);
else
underlyingQueue.Enqueue(item);
isDirty = true;
}
public int Add(object value)
{
if (requiresSync)
lock (syncRoot)
underlyingQueue.Enqueue((T)value);
else
underlyingQueue.Enqueue((T)value);
isDirty = true;
lock (syncRoot)
{
UpdateLists();
return underlyingList.IndexOf((T)value);
}
}
public bool Contains(object value)
{
lock (syncRoot)
{
UpdateLists();
return underlyingList.Contains((T)value);
}
}
public int IndexOf(object value)
{
lock (syncRoot)
{
UpdateLists();
return underlyingList.IndexOf((T)value);
}
}
public void Insert(int index, object value)
{
lock (syncRoot)
{
UpdateLists();
underlyingList.Insert(index, (T)value);
}
}
public void Remove(object value)
{
lock (syncRoot)
{
UpdateLists();
underlyingList.Remove((T)value);
}
}
public void RemoveAt(int index)
{
lock (syncRoot)
{
UpdateLists();
underlyingList.RemoveAt(index);
}
}
T IList<T>.this[int index]
{
get
{
lock (syncRoot)
{
UpdateLists();
return underlyingList[index];
}
}
set
{
lock (syncRoot)
{
UpdateLists();
underlyingList[index] = value;
}
}
}
object IList.this[int index]
{
get { return ((IList)this)[index]; }
set { ((IList)this)[index] = (T)value; }
}
public bool IsReadOnly
{
get { return false; }
}
public bool IsFixedSize
{
get { return false; }
}
public void Clear()
{
lock (syncRoot)
{
UpdateLists();
underlyingList.Clear();
}
}
public bool Contains(T item)
{
lock (syncRoot)
{
UpdateLists();
return underlyingList.Contains(item);
}
}
public void CopyTo(T[] array, int arrayIndex)
{
lock (syncRoot)
{
UpdateLists();
underlyingList.CopyTo(array, arrayIndex);
}
}
public bool Remove(T item)
{
lock (syncRoot)
{
UpdateLists();
return underlyingList.Remove(item);
}
}
public void CopyTo(Array array, int index)
{
lock (syncRoot)
{
UpdateLists();
underlyingList.CopyTo((T[])array, index);
}
}
public int Count
{
get
{
lock (syncRoot)
{
UpdateLists();
return underlyingList.Count;
}
}
}
public object SyncRoot
{
get { return syncRoot; }
}
public bool IsSynchronized
{
get { return true; }
}
public int IndexOf(T item)
{
lock (syncRoot)
{
UpdateLists();
return underlyingList.IndexOf(item);
}
}
public void Insert(int index, T item)
{
lock (syncRoot)
{
UpdateLists();
underlyingList.Insert(index, item);
}
}
}
---- MainWindow.xaml.cs ----
public MainWindow()
{
InitializeComponent();
Console.WriteLine(#"standard List<T< - 10000 work items");
var list1 = new ConcurrentList<int>();
var start1 = DateTime.Now.Ticks;
DoWork(list1, 10000);
var end1 = DateTime.Now.Ticks;
var c1 = list1.Count; // accesses list
var cend1 = DateTime.Now.Ticks;
Console.WriteLine();
Console.WriteLine(#"Work Time: {0} - milliseconds", (end1 - start1)
/ TimeSpan.TicksPerMillisecond);
Console.WriteLine(#"Get Count Time: {0} - milliseconds",
(cend1 - end1)
/ TimeSpan.TicksPerMillisecond);
Console.WriteLine();
Console.WriteLine();
Console.WriteLine(#"ConcurrentList<T> - 10000 work items on single thread");
var list2 = new ConcurrentList<int>();
var start2 = DateTime.Now.Ticks;
DoWork(list2, 10000);
var end2 = DateTime.Now.Ticks;
var c2 = list2.Count; // accesses list, update performed
var cend2 = DateTime.Now.Ticks;
Console.WriteLine();
Console.WriteLine(#"Work Time: {0} - milliseconds", (end2 - start2)
/ TimeSpan.TicksPerMillisecond);
Console.WriteLine(#"Get Count Time: {0} - milliseconds",
(cend2 - end2)
/ TimeSpan.TicksPerMillisecond);
Console.WriteLine();
Console.WriteLine();
Console.WriteLine(#"ConcurrentList<T> - 10000 work items on 4 parallel tasks");
var list3 = new ConcurrentList<int>();
var start3 = DateTime.Now.Ticks;
var tasks3 = new[]
{
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
Task.Factory.StartNew(() => DoWork(list3,10000)),
};
Task.WaitAll(tasks3);
var end3 = DateTime.Now.Ticks;
var c3 = list3.Count; // accesses list, update performed
var cend3 = DateTime.Now.Ticks;
Console.WriteLine();
Console.WriteLine(#"Work Time: {0} - milliseconds", (end3 - start3)
/ TimeSpan.TicksPerMillisecond);
Console.WriteLine(#"Get Count Time: {0} - milliseconds",
(cend3 - end3)
/ TimeSpan.TicksPerMillisecond);
Console.WriteLine();
}
static void DoWork(ConcurrentList<int> list, int count)
{
for (var i = 0; i < count; i++)
{
var foo = list.Any(x => x == 2); //<--- throws Exception (Enumeration changed)
if (foo)
{
}
list.Add(Rnd.Next(int.MinValue, int.MaxValue));
// use spinwait to emulate work but avoiding
// context switching
//Thread.SpinWait(100000);
}
}
It's not about multithreading actually.
Here's Any implementation:
public static bool Any<TSource>(this IEnumerable<TSource> source) {
if (source == null) throw Error.ArgumentNull("source");
using (IEnumerator<TSource> e = source.GetEnumerator()) {
// Suppose that another thread added an item here
// And obviously you get an InvalidOperationException
if (e.MoveNext()) return true;
}
return false;
}
This code will throw the same exception:
var collection = new List<int>() { 0 };
foreach (var c in collection) {
collection.Add(c);
}
To make your collection thread-safe you can return an enumerator of a copy of original collection for every GetEnumerator call (that's the way that collections from System.Collections.Concurrent act):
public IEnumerator<T> GetEnumerator()
{
lock (syncRoot)
{
UpdateLists();
return new List<T>(underlyingList).GetEnumerator();
}
}
Related
I'm working on some old code that does not have IAsyncEnumerable, so I was trying to mimic IAsyncEnumerable behavior with IObservable. The issue is that IObservable is push-based and is giving results in random order.
Here is a very simplified version of what I want to achieve
public static class Program
{
public static void Main()
{
Test().Wait();
Console.ReadLine();
}
private static async Task Test()
{
var l = await GetFirstObservable()
.SelectMany(i =>
{
return GetSecondObservable()
.Select(s => Tuple.Create(i, s));
})
.Take(3)
.ToList();
foreach (var item in l)
{
Console.WriteLine(item);
}
}
private static async Task<T> Echo<T>(T value)
{
Console.WriteLine(value);
await Task.Delay(1);
return value;
}
private static IObservable<int> GetFirstObservable()
{
return Observable.Create(async (IObserver<int> observer, CancellationToken cancellationToken) =>
{
foreach (var i in new [] {1, 2, 3})
{
if (cancellationToken.IsCancellationRequested)
{
return;
}
var e = await Echo(i);
if (cancellationToken.IsCancellationRequested)
{
return;
}
observer.OnNext(e);
}
});
}
private static IObservable<string> GetSecondObservable()
{
return Observable.Create(async (IObserver<string> observer, CancellationToken cancellationToken) =>
{
foreach (var i in new[] { "A", "B" })
{
if (cancellationToken.IsCancellationRequested)
{
return;
}
var e = await Echo(i);
if (cancellationToken.IsCancellationRequested)
{
return;
}
observer.OnNext(i);
}
});
}
}
I want the following results to be in console:
1
A
B
2
A
(1, A)
(1, B)
(2, A)
Instead, the first observable does not wait for the second observable to complete, and I get random results like:
1
A
2
A
3
B
A
B
(1, A)
(2, A)
(1, B)
Is it even possible to achieve this pull-like behavior, or should I just look for something else?
I am processing a queue concurrently using an ActionBlock.
The one catch here is that when processing an item in the queue, I may want to wait until a dependency is satisfied by the processing of another item in the queue.
I think I should be able to do this with the TPL DataFlow library with linking, postponement and release of postponement but I'm not sure what constructs to use.
In pseudocode:
public class Item
{
public string Name { get; set; }
public List<string> DependsOn = new List<string>();
}
ActionBlock<Item> block = null;
var block = new ActionBlock<Item>(o => {
if (!HasActionBlockProcessedAllDependencies(o.DependsOn))
{
// enqueue a callback when ALL dependencies have been completed
}
else
{
DoWork(o);
}
},
new ExecutionDataflowBlockOptions {
MaxDegreeOfParallelism = resourceProcessorOptions.MaximumProviderConcurrency
});
var items = new[]
{
new Item { Name = "Apple", DependsOn = { "Pear" } },
new Item { Name = "Pear" }
}
I am not sure if this will be helpful to you, but here is a custom DependencyTransformBlock class that knows about the dependencies between the items it receives, and processes each one only after its dependencies have been successfully processed. This custom block supports all the built-in functionality of a normal TransformBlock, except from the EnsureOrdered option.
The constructors of this class accept a Func<TInput, TKey> lambda for retrieving the key of each item, and a Func<TInput, IReadOnlyCollection<TKey>> lambda for retrieving its dependencies. The keys are expected to be unique. In case a duplicate key is found, the block will complete with failure.
In case of circular dependencies between items, the affected items will remain unprocessed. The property TInput[] Unprocessed allows to retrieve the unprocessed items after the completion of the block. An item can also remain unprocessed in case any of its dependencies is not supplied.
public class DependencyTransformBlock<TInput, TKey, TOutput> :
ITargetBlock<TInput>, ISourceBlock<TOutput>
{
private readonly ITargetBlock<TInput> _inputBlock;
private readonly IPropagatorBlock<Item, TOutput> _transformBlock;
private readonly object _locker = new object();
private readonly Dictionary<TKey, Item> _items;
private int _pendingCount = 1;
// The initial 1 represents the completion of the _inputBlock
private class Item
{
public TKey Key;
public TInput Input;
public bool HasInput;
public bool IsCompleted;
public HashSet<Item> Dependencies;
public HashSet<Item> Dependents;
public Item(TKey key) => Key = key;
}
public DependencyTransformBlock(
Func<TInput, Task<TOutput>> transform,
Func<TInput, TKey> keySelector,
Func<TInput, IReadOnlyCollection<TKey>> dependenciesSelector,
ExecutionDataflowBlockOptions dataflowBlockOptions = null,
IEqualityComparer<TKey> keyComparer = null)
{
if (transform == null)
throw new ArgumentNullException(nameof(transform));
if (keySelector == null)
throw new ArgumentNullException(nameof(keySelector));
if (dependenciesSelector == null)
throw new ArgumentNullException(nameof(dependenciesSelector));
dataflowBlockOptions =
dataflowBlockOptions ?? new ExecutionDataflowBlockOptions();
keyComparer = keyComparer ?? EqualityComparer<TKey>.Default;
_items = new Dictionary<TKey, Item>(keyComparer);
_inputBlock = new ActionBlock<TInput>(async input =>
{
var key = keySelector(input);
var dependencyKeys = dependenciesSelector(input);
bool isReadyForProcessing = true;
Item item;
lock (_locker)
{
if (!_items.TryGetValue(key, out item))
{
item = new Item(key);
_items.Add(key, item);
}
if (item.HasInput)
throw new InvalidOperationException($"Duplicate key ({key}).");
item.Input = input;
item.HasInput = true;
if (dependencyKeys != null && dependencyKeys.Count > 0)
{
item.Dependencies = new HashSet<Item>();
foreach (var dependencyKey in dependencyKeys)
{
if (!_items.TryGetValue(dependencyKey, out var dependency))
{
dependency = new Item(dependencyKey);
_items.Add(dependencyKey, dependency);
}
if (!dependency.IsCompleted)
{
item.Dependencies.Add(dependency);
if (dependency.Dependents == null)
dependency.Dependents = new HashSet<Item>();
dependency.Dependents.Add(item);
}
}
isReadyForProcessing = item.Dependencies.Count == 0;
}
if (isReadyForProcessing) _pendingCount++;
}
if (isReadyForProcessing)
{
await _transformBlock.SendAsync(item);
}
}, new ExecutionDataflowBlockOptions()
{
CancellationToken = dataflowBlockOptions.CancellationToken,
BoundedCapacity = 1
});
var middleBuffer = new BufferBlock<Item>(new DataflowBlockOptions()
{
CancellationToken = dataflowBlockOptions.CancellationToken,
BoundedCapacity = DataflowBlockOptions.Unbounded
});
_transformBlock = new TransformBlock<Item, TOutput>(async item =>
{
try
{
TInput input;
lock (_locker)
{
Debug.Assert(item.HasInput && !item.IsCompleted);
input = item.Input;
}
var result = await transform(input).ConfigureAwait(false);
lock (_locker)
{
item.IsCompleted = true;
if (item.Dependents != null)
{
foreach (var dependent in item.Dependents)
{
Debug.Assert(dependent.Dependencies != null);
var removed = dependent.Dependencies.Remove(item);
Debug.Assert(removed);
if (dependent.HasInput
&& dependent.Dependencies.Count == 0)
{
middleBuffer.Post(dependent);
_pendingCount++;
}
}
}
item.Input = default; // Cleanup
item.Dependencies = null;
item.Dependents = null;
}
return result;
}
finally
{
lock (_locker)
{
_pendingCount--;
if (_pendingCount == 0) middleBuffer.Complete();
}
}
}, dataflowBlockOptions);
middleBuffer.LinkTo(_transformBlock);
PropagateCompletion(_inputBlock, middleBuffer,
condition: () => { lock (_locker) return --_pendingCount == 0; });
PropagateCompletion(middleBuffer, _transformBlock);
PropagateFailure(_transformBlock, middleBuffer);
PropagateFailure(_transformBlock, _inputBlock);
}
// Constructor with synchronous lambda
public DependencyTransformBlock(
Func<TInput, TOutput> transform,
Func<TInput, TKey> keySelector,
Func<TInput, IReadOnlyCollection<TKey>> dependenciesSelector,
ExecutionDataflowBlockOptions dataflowBlockOptions = null,
IEqualityComparer<TKey> keyComparer = null) : this(
input => Task.FromResult(transform(input)),
keySelector, dependenciesSelector, dataflowBlockOptions, keyComparer)
{
if (transform == null) throw new ArgumentNullException(nameof(transform));
}
public TInput[] Unprocessed
{
get
{
lock (_locker) return _items.Values
.Where(item => item.HasInput && !item.IsCompleted)
.Select(item => item.Input)
.ToArray();
}
}
public Task Completion => _transformBlock.Completion;
public void Complete() => _inputBlock.Complete();
void IDataflowBlock.Fault(Exception ex) => _inputBlock.Fault(ex);
DataflowMessageStatus ITargetBlock<TInput>.OfferMessage(
DataflowMessageHeader header, TInput value, ISourceBlock<TInput> source,
bool consumeToAccept)
{
return _inputBlock.OfferMessage(header, value, source, consumeToAccept);
}
TOutput ISourceBlock<TOutput>.ConsumeMessage(DataflowMessageHeader header,
ITargetBlock<TOutput> target, out bool messageConsumed)
{
return _transformBlock.ConsumeMessage(header, target, out messageConsumed);
}
bool ISourceBlock<TOutput>.ReserveMessage(DataflowMessageHeader header,
ITargetBlock<TOutput> target)
{
return _transformBlock.ReserveMessage(header, target);
}
void ISourceBlock<TOutput>.ReleaseReservation(DataflowMessageHeader header,
ITargetBlock<TOutput> target)
{
_transformBlock.ReleaseReservation(header, target);
}
public IDisposable LinkTo(ITargetBlock<TOutput> target,
DataflowLinkOptions linkOptions)
{
return _transformBlock.LinkTo(target, linkOptions);
}
private async void PropagateCompletion(IDataflowBlock source,
IDataflowBlock target, Func<bool> condition = null)
{
try { await source.Completion.ConfigureAwait(false); } catch { }
if (source.Completion.IsFaulted)
target.Fault(source.Completion.Exception.InnerException);
else
if (condition == null || condition()) target.Complete();
}
private async void PropagateFailure(IDataflowBlock source,
IDataflowBlock target)
{
try { await source.Completion.ConfigureAwait(false); } catch { }
if (source.Completion.IsFaulted)
target.Fault(source.Completion.Exception.InnerException);
}
}
Usage example:
var block = new DependencyTransformBlock<Item, string, Item>(item =>
{
DoWork(item);
return item;
},
keySelector: item => item.Name,
dependenciesSelector: item => item.DependsOn,
new ExecutionDataflowBlockOptions
{
MaxDegreeOfParallelism = Environment.ProcessorCount
},
keyComparer: StringComparer.OrdinalIgnoreCase);
//...
block.LinkTo(DataflowBlock.NullTarget<Item>());
In this example the block is linked to a NullTarget in order to discard its output, so that it becomes essentially an ActionBlock equivalent.
In the following program, how do I propagate the OverflowException to Main?
class Program
{
static readonly CancellationTokenSource Source = new CancellationTokenSource();
static void Main(string[] args)
{
CallDoSomething(1);
Console.WriteLine("Press enter to exit");
Console.ReadLine();
}
static void CallDoSomething(int foo)
{
var task = Task.Factory.StartNew(() => DoSomething(foo), Source.Token);
task.ContinueWith(t =>
{
var result = task.Result;
Console.WriteLine(result);
Thread.Sleep(500);
CallDoSomething(result);
}, TaskContinuationOptions.NotOnCanceled);
task.ContinueWith(t => t.Exception?.Handle(ex => throw ex), TaskContinuationOptions.OnlyOnFaulted);
}
static int DoSomething(int foo)
{
if (foo > 100)
{
Source.Cancel();
throw new OverflowException("foo overflowed");
}
return foo * 2;
}
}
I have a multi-threaded application that has to perform 3 different categories of work items. Category A is the highest priority items, Category B items comes after A and Category C items comes after B. These work items are queued to thread pool using Tasks. Let's say, there are 10 category C items already in queue and then a category B item is added. In this case, I would like category B item to be processed before any of the category C item. Is there anyway of accomplishing this?
You can implement it by creating your own queue process. This is just a code mockup.
Create an object like this
public class PrioritizableTask
{
public PrioritizableTask(Task task, int taskPriority)
{
Task = task;
Priority = taskPriority;
}
public int Priority { get; private set; }
public Task Task { get; private set; }
}
And then create another collection class and implement a new method on it, something like this.
public class PrioritizableTasksCollection : IList<PrioritizableTask>
{
private static readonly List<PrioritizableTask> runners = new List<PrioritizableTask>();
public void Add(PrioritizableTask item)
{
runners.Add(item);
}
public Task GetNextPriority()
{
var priorityTask = runners.OrderBy(x => x.Priority).FirstOrDefault();
return priorityTask != null ? priorityTask.Task : null;
}
}
Consume like
PrioritizableTasksCollection executors = new PrioritizableTasksCollection();
executors.Add(new PrioritizableTask(new Task(() => { }), 4));
executors.Add(new PrioritizableTask(new Task(() => { }), 3));
executors.Add(new PrioritizableTask(new Task(() => { }), 7));
executors.Add(new PrioritizableTask(new Task(() => { }), 5));
executors.Add(new PrioritizableTask(new Task(() => { }), 1));
executors.Add(new PrioritizableTask(new Task(() => { }), 2));
Task executeNext = executors.GetNextPriority();
Implement your own deleting on the collection.
I've been looking at your problem and i did not find a built-in thread-safe sorted collection.
So i built a basic thread-safe SortedSet<int> wrapper class.
Sorted Set
public class MyThreadSafeSortedSet
{
private SortedSet<int> _set = new SortedSet<int>(new MyComparer());
private readonly object _locker = new object();
public void Add(int value)
{
lock (_locker)
{
_set.Add(value);
}
}
public int? Take()
{
lock (_locker)
{
if (_set.Count == 0)
return null;
var item = _set.First();
_set.Remove(item);
return item;
}
}
}
I built a custom comparer which prefers even numbers
public class MyComparer : Comparer<int>
{
public override int Compare(int x, int y)
{
if (x % 2 == 0)
{
if (y % 2 == 0)
return x - y;
else
return -1;
}
else
{
if (y % 2 == 0)
return 1;
else
return x - y;
}
}
}
And finally two threads. One to produce items; the other one to take them
static void Main(string[] args)
{
MyThreadSafeSortedSet queue = new MyThreadSafeSortedSet();
var task1 = Task.Run(() =>
{
Random r = new Random();
for (int i = 0; i < 15; i++)
{
Task.Delay(100).Wait();
var randomNumber = r.Next();
queue.Add(randomNumber);
}
Console.WriteLine("I'm done adding");
});
var task2 = Task.Run(() =>
{
Random r = new Random();
while (true)
{
var delay = r.Next(500);
Task.Delay(delay).Wait();
var item = queue.Take();
Console.WriteLine("Took: {0}", item);
if (item == null)
break;
}
});
Task.WaitAll(task2);
}
You can change the specialized SortedSet and custom comparer for your own classes.
Hope it helped
Please look my version of solution based on BinarySearch method of List class.
enum CategoryOfWorkItem: int { C = 0, B, A };
struct WorkItem : IComparer<WorkItem>
{
public CategoryOfWorkItem Category;
public int Compare(WorkItem x, WorkItem y)
{
return x.Category - y.Category;
}
public void AddTo(List<WorkItem> list)
{
int i = list.BinarySearch(this, this);
if (i < 0) i = ~i;
list.Insert(i, this);
}
}
Example of usage
List<WorkItem> list = new List<WorkItem>();
Task.Run(() =>
{
Random rand = new Random();
for (int i = 0; i < 20; i++)
{
WorkItem item = new WorkItem();
switch (rand.Next(0, 3))
{
case 0: item.Category = CategoryOfWorkItem.A; break;
case 1: item.Category = CategoryOfWorkItem.B; break;
case 2: item.Category = CategoryOfWorkItem.C; break;
}
lock (list)
{
item.AddTo(list);
}
Task.Delay(rand.Next(100, 1000)).Wait();
Console.WriteLine("Put {0}", item.Category);
}
Console.WriteLine("Putting finished.");
});
Task.WaitAll(Task.Run(() =>
{
Random rand = new Random();
while (true)
{
WorkItem item;
Task.Delay(rand.Next(500, 1000)).Wait();
lock (list)
{
if (list.Count < 1) break;
item = list[list.Count - 1];
list.RemoveAt(list.Count - 1);
}
Console.WriteLine("Get {0}", item.Category);
}
Console.WriteLine("Getting finished.");
}));
(I am currently restricted to .NET 4.0)
I have a situation where I want to process items in parallel as much as possible, order must be maintained, and items can be added at any time until "stop" is pressed.
Items can come in "bursts", so it is possible that the queue will completely drain, there will be a pause, and then a large number of items will come in at once again.
I want the results to become available as soon as they are done.
Here is a simplified example:
class Program
{
static void Main(string[] args)
{
BlockingCollection<int> itemsQueue = new BlockingCollection<int>();
Random random = new Random();
var results = itemsQueue
.GetConsumingEnumerable()
.AsParallel()
.AsOrdered()
.WithMergeOptions(ParallelMergeOptions.NotBuffered)
.Select(i =>
{
int work = 0;
Console.WriteLine("Working on " + i);
//simulate work
for (int busy = 0; busy <= 90000000; ++busy) { ++work; };
Console.WriteLine("Finished " + i);
return i;
});
TaskCompletionSource<bool> completion = new TaskCompletionSource<bool>();
Task.Factory.StartNew(() =>
{
foreach (int i in results)
{
Console.WriteLine("Result Available: " + i);
}
completion.SetResult(true);
});
int iterations;
iterations = random.Next(5, 50);
Console.WriteLine("------- iterations: " + iterations + "-------");
for (int i = 1; i <= iterations; ++i)
{
itemsQueue.Add(i);
}
while (true)
{
char c = Console.ReadKey().KeyChar;
if (c == 's')
{
break;
}
else
{
++iterations;
Console.WriteLine("adding: " + iterations);
itemsQueue.Add(iterations);
}
}
itemsQueue.CompleteAdding();
completion.Task.Wait();
Console.WriteLine("Done!");
Console.ReadKey();
itemsQueue.Dispose();
}
}
As the above example shows, what will typically happen, is that results will become available up until the last few results (I'm not 100% sure of this, but the number of results that it stops short may be roughly correlated with the number of cores on the box), until itemsQueue.CompleteAdding(); is called (in the example, the "s" key is pressed), at which point the rest of the results will finally become available.
Why do the results not become available immediately despite the fact that I specify .WithMergeOptions(ParallelMergeOptions.NotBuffered), and how can I make them become available immediately?
Note that the problem is not an issue if you can call BlockingQueue.CompleteAdding() instance method - that will cause all results to finish.
Short Answer
If on the other hand, you need to maintain order, and need to have the results available as soon as they can be, and you don't have an opportunity to call BlockingQueue.CompleteAdding(), then if at all possible, you are much better off having the consumption of items in the queue be non-parallel, but parallelize the processing of each individual task.
E.g.
class Program
{
//Not parallel, but suitable for monitoring queue purposes,
//can then focus on parallelizing each individual task
static void Main(string[] args)
{
BlockingCollection<int> itemsQueue = new BlockingCollection<int>();
Random random = new Random();
var results = itemsQueue.GetConsumingEnumerable()
.Select(i =>
{
Console.WriteLine("Working on " + i);
//Focus your parallelization efforts on the work of
//the individual task
//E.g, simulated:
double work = Enumerable.Range(0, 90000000 - (10 * (i % 3)))
.AsParallel()
.Select(w => w + 1)
.Average();
Console.WriteLine("Finished " + i);
return i;
});
TaskCompletionSource<bool> completion = new TaskCompletionSource<bool>();
Task.Factory.StartNew(() =>
{
foreach (int i in results)
{
Console.WriteLine("Result Available: " + i);
}
completion.SetResult(true);
});
int iterations;
iterations = random.Next(5, 50);
Console.WriteLine("------- iterations: " + iterations + "-------");
for (int i = 1; i <= iterations; ++i)
{
itemsQueue.Add(i);
}
while (true)
{
char c = Console.ReadKey().KeyChar;
if (c == 's')
{
break;
}
else
{
++iterations;
Console.WriteLine("adding: " + iterations);
itemsQueue.Add(iterations);
}
}
itemsQueue.CompleteAdding();
completion.Task.Wait();
Console.WriteLine("Done!");
Console.ReadKey();
itemsQueue.Dispose();
}
}
Longer Answer
It appears that there is an interaction between the BlockingQueue in particular and AsOrderable()
It seems that AsOrderable will stop the processing of Tasks whenever one of the enumerators in the partition blocks.
The default partitioner will deal with chunks typically greater than one - and the blocking queue will block until the chunk can be filled (or CompleteAdding is filled).
However, even with a chunk size of 1, the problem does not completely go away.
To play around with this, you can sometimes see the behavior when implementing your own partitioner. (Note, that if you specify .WithDegreeOfParallelism(1) the problem with results waiting to appear goes away - but of course, having a degree of parallelism = 1 kind of defeats the purpose!)
e.g.
public class ImmediateOrderedPartitioner<T> : OrderablePartitioner<T>
{
private readonly IEnumerable<T> _consumingEnumerable;
private readonly Ordering _ordering = new Ordering();
public ImmediateOrderedPartitioner(BlockingCollection<T> collection) : base(true, true, true)
{
_consumingEnumerable = collection.GetConsumingEnumerable();
}
private class Ordering
{
public int Order = -1;
}
private class MyEnumerator<S> : IEnumerator<KeyValuePair<long, S>>
{
private readonly object _orderLock = new object();
private readonly IEnumerable<S> _enumerable;
private KeyValuePair<long, S> _current;
private bool _hasItem;
private Ordering _ordering;
public MyEnumerator(IEnumerable<S> consumingEnumerable, Ordering ordering)
{
_enumerable = consumingEnumerable;
_ordering = ordering;
}
public KeyValuePair<long, S> Current
{
get
{
if (_hasItem)
{
return _current;
}
else
throw new InvalidOperationException();
}
}
public void Dispose()
{
}
object System.Collections.IEnumerator.Current
{
get
{
return Current;
}
}
public bool MoveNext()
{
lock (_orderLock)
{
bool canMoveNext = false;
var next = _enumerable.Take(1).FirstOrDefault(s => { canMoveNext = true; return true; });
if (canMoveNext)
{
_current = new KeyValuePair<long, S>(++_ordering.Order, next);
_hasItem = true;
++_ordering.Order;
}
else
{
_hasItem = false;
}
return canMoveNext;
}
}
public void Reset()
{
throw new NotSupportedException();
}
}
public override IList<IEnumerator<KeyValuePair<long, T>>> GetOrderablePartitions(int partitionCount)
{
var result = new List<IEnumerator<KeyValuePair<long,T>>>();
//for (int i = 0; i < partitionCount; ++i)
//{
// result.Add(new MyEnumerator<T>(_consumingEnumerable, _ordering));
//}
//share the enumerator between partitions in this case to maintain
//the proper locking on ordering.
var enumerator = new MyEnumerator<T>(_consumingEnumerable, _ordering);
for (int i = 0; i < partitionCount; ++i)
{
result.Add(enumerator);
}
return result;
}
public override bool SupportsDynamicPartitions
{
get
{
return false;
}
}
public override IEnumerable<T> GetDynamicPartitions()
{
throw new NotImplementedException();
return base.GetDynamicPartitions();
}
public override IEnumerable<KeyValuePair<long, T>> GetOrderableDynamicPartitions()
{
throw new NotImplementedException();
return base.GetOrderableDynamicPartitions();
}
public override IList<IEnumerator<T>> GetPartitions(int partitionCount)
{
throw new NotImplementedException();
return base.GetPartitions(partitionCount);
}
}
class Program
{
static void Main(string[] args)
{
BlockingCollection<int> itemsQueue = new BlockingCollection<int>();
var partitioner = new ImmediateOrderedPartitioner<int>(itemsQueue);
Random random = new Random();
var results = partitioner
.AsParallel()
.AsOrdered()
.WithMergeOptions(ParallelMergeOptions.NotBuffered)
//.WithDegreeOfParallelism(1)
.Select(i =>
{
int work = 0;
Console.WriteLine("Working on " + i);
for (int busy = 0; busy <= 90000000; ++busy) { ++work; };
Console.WriteLine("Finished " + i);
return i;
});
TaskCompletionSource<bool> completion = new TaskCompletionSource<bool>();
Task.Factory.StartNew(() =>
{
foreach (int i in results)
{
Console.WriteLine("Result Available: " + i);
}
completion.SetResult(true);
});
int iterations;
iterations = 1; // random.Next(5, 50);
Console.WriteLine("------- iterations: " + iterations + "-------");
for (int i = 1; i <= iterations; ++i)
{
itemsQueue.Add(i);
}
while (true)
{
char c = Console.ReadKey().KeyChar;
if (c == 's')
{
break;
}
else
{
++iterations;
Console.WriteLine("adding: " + iterations);
itemsQueue.Add(iterations);
}
}
itemsQueue.CompleteAdding();
completion.Task.Wait();
Console.WriteLine("Done!");
Console.ReadKey();
itemsQueue.Dispose();
}
}
Alternate Approach
If parallelizing the individual task (as recommended in the "short answer") is not a possibility, and all the other problem constraints apply, then you can implement your own type of queue that spins up tasks for each item - thus letting the Task Parallel Library handle the scheduling of work, but synchronize the consumption of results on your own.
For example, something like the below (with the standard "no warranties" disclaimer!)
public class QueuedItem<TInput, TResult>
{
private readonly object _lockObject = new object();
private TResult _result;
private readonly TInput _input;
private readonly TResult _notfinished;
internal readonly bool IsEndQueue = false;
internal QueuedItem()
{
IsEndQueue = true;
}
public QueuedItem(TInput input, TResult notfinished)
{
_input = input;
_notfinished = notfinished;
_result = _notfinished;
}
public TResult ReadResult()
{
lock (_lockObject)
{
if (!IsResultReady)
throw new InvalidOperationException("Check IsResultReady before calling ReadResult()");
return _result;
}
}
public void WriteResult(TResult value)
{
lock (_lockObject)
{
if (IsResultReady)
throw new InvalidOperationException("Result has already been written");
_result = value;
}
}
public TInput Input { get { return _input; } }
public bool IsResultReady
{
get
{
lock (_lockObject)
{
return !object.Equals(_result, _notfinished) || IsEndQueue;
}
}
}
}
public class ParallelImmediateOrderedProcessingQueue<TInput, TResult>
{
private readonly ReaderWriterLockSlim _addLock = new ReaderWriterLockSlim();
private readonly object _readingResultsLock = new object();
private readonly ConcurrentQueue<QueuedItem<TInput, TResult>> _concurrentQueue = new ConcurrentQueue<QueuedItem<TInput, TResult>>();
bool _isFinishedAdding = false;
private readonly TResult _notFinished;
private readonly Action<QueuedItem<TInput, TResult>> _processor;
/// <param name="notFinished">A value that indicates the result is not yet finished</param>
/// <param name="processor">Must call SetResult() on argument when finished.</param>
public ParallelImmediateOrderedProcessingQueue(TResult notFinished, Action<QueuedItem<TInput, TResult>> processor)
{
_notFinished = notFinished;
_processor = processor;
}
public event Action ResultsReady = delegate { };
private void SignalResult()
{
QueuedItem<TInput, TResult> item;
if (_concurrentQueue.TryPeek(out item) && item.IsResultReady)
{
ResultsReady();
}
}
public void Add(TInput input)
{
bool shouldThrow = false;
_addLock.EnterReadLock();
{
shouldThrow = _isFinishedAdding;
if (!shouldThrow)
{
var queuedItem = new QueuedItem<TInput, TResult>(input, _notFinished);
_concurrentQueue.Enqueue(queuedItem);
Task.Factory.StartNew(() => { _processor(queuedItem); SignalResult(); });
}
}
_addLock.ExitReadLock();
if (shouldThrow)
throw new InvalidOperationException("An attempt was made to add an item, but adding items was marked as completed");
}
public IEnumerable<TResult> ConsumeReadyResults()
{
//lock necessary to preserve ordering
lock (_readingResultsLock)
{
QueuedItem<TInput, TResult> queuedItem;
while (_concurrentQueue.TryPeek(out queuedItem) && queuedItem.IsResultReady)
{
if (!_concurrentQueue.TryDequeue(out queuedItem))
throw new ApplicationException("this shouldn't happen");
if (queuedItem.IsEndQueue)
{
_completion.SetResult(true);
}
else
{
yield return queuedItem.ReadResult();
}
}
}
}
public void CompleteAddingItems()
{
_addLock.EnterWriteLock();
{
_isFinishedAdding = true;
var queueCompletion = new QueuedItem<TInput, TResult>();
_concurrentQueue.Enqueue(queueCompletion);
Task.Factory.StartNew(() => { SignalResult(); });
}
_addLock.ExitWriteLock();
}
TaskCompletionSource<bool> _completion = new TaskCompletionSource<bool>();
public void WaitForCompletion()
{
_completion.Task.Wait();
}
}
class Program
{
static void Main(string[] args)
{
const int notFinished = int.MinValue;
var processingQueue = new ParallelImmediateOrderedProcessingQueue<int, int>(notFinished, qi =>
{
int work = 0;
Console.WriteLine("Working on " + qi.Input);
//simulate work
int maxBusy = 90000000 - (10 * (qi.Input % 3));
for (int busy = 0; busy <= maxBusy; ++busy) { ++work; };
Console.WriteLine("Finished " + qi.Input);
qi.WriteResult(qi.Input);
});
processingQueue.ResultsReady += new Action(() =>
{
Task.Factory.StartNew(() =>
{
foreach (int result in processingQueue.ConsumeReadyResults())
{
Console.WriteLine("Results Available: " + result);
}
});
});
int iterations = new Random().Next(5, 50);
Console.WriteLine("------- iterations: " + iterations + "-------");
for (int i = 1; i <= iterations; ++i)
{
processingQueue.Add(i);
}
while (true)
{
char c = Console.ReadKey().KeyChar;
if (c == 's')
{
break;
}
else
{
++iterations;
Console.WriteLine("adding: " + iterations);
processingQueue.Add(iterations);
}
}
processingQueue.CompleteAddingItems();
processingQueue.WaitForCompletion();
Console.WriteLine("Done!");
Console.ReadKey();
}
}