For a unity project. Procedural terrain generation system that runs in generations. The output of each generation becomes the input of the next, so they must be run sequentially.
They are also slow enough that they need to run on another thread.
The below code works by generating a "sector" and storing it in a dictionary with its "generation" number as the key. It also stores a small initialization object with just the data required to initialize a given generation, so that I can destroy sectors to save memory and re instantiate them backwards down the chain.
Increment() finds the highest key, and generates a new sector with the previous one as its input.
Task.Run() does work to generate sectors without blocking the rest of the game. The problem is that it's possible to request a new sector before the previous one finished generating, etc.
What's the best pattern to prevent generation 3 being generated before generation 2 is finished?
public class WorldGeneratorAsync : MonoBehaviour
{
public Dictionary<int, SectorController> SectorContollerDict = new Dictionary<int, SectorController>();
public Dictionary<int, TerrainGraphInput> GraphInputDict = new Dictionary<int, TerrainGraphInput>();
public int globalSeed;
public TerrainGraph terrainGraph;
async void Initialise()
{
DestroyAllSectors();
await InstantiateSectorAsync(0);
}
async void Increment()
{
var highestGeneration = SectorContollerDict.Keys.Max();
await InstantiateSectorAsync(highestGeneration+1);
}
async Task InstantiateSectorAsync(int generation)
{
if (generation == 0) // if we are first generation, init with dummy data
{
var inputData = new TerrainGraphInput(globalSeed, generation); // dummy data
var outputData = await Task.Run(() =>terrainGraph.GetGraphOutput(inputData)); // slow function
lock (SectorContollerDict)
{
SectorContollerDict[generation] = SectorController.New(outputData);
}
lock (GraphInputDict)
{
GraphInputDict[generation] = inputData;
}
}
else // we take the init data from the previous generation
{
int adder = generation > 0 ? -1 : 1;
TerrainGraphInput inputData;
if (GraphInputDict.Keys.Contains(generation))
{
inputData = GraphInputDict[generation];
}
else if (SectorContollerDict.Keys.Contains(generation + adder))
{
var previousSectorController = SectorContollerDict[generation + adder];
inputData = new TerrainGraphInput(
previousSectorController.sectorData,
previousSectorController.sectorData.EndSeeds,
generation,
globalSeed
);
}
else
{
throw new NoValidInputException();
}
var outputData = await Task.Run(()=>terrainGraph.GetGraphOutput(inputData)); // slow function
lock (SectorContollerDict)
{
SectorContollerDict[generation] = SectorController.New(outputData);
}
lock (GraphInputDict)
{
GraphInputDict[generation] = inputData;
}
}
}
private void DestroyAllSectors()
{
SectorContollerDict = new Dictionary<int, SectorController>();
GraphInputDict = new Dictionary<int, TerrainGraphInput>();
foreach (var sc in GameObject.FindObjectsOfType<SectorController>())
{
sc.DestroyMe();
}
}
}
Thanks to Orace - their idea worked. Simpler that I expected - just switch the dictionary of sectorControllers to tasks, and await the previous generation in the instantiation function.
public class WorldGeneratorAsync : MonoBehaviour
{
public Dictionary<int, Task<SectorController>> TaskDict = new();
// public Dictionary<int, SectorController> SectorContollerDict = new();
public Dictionary<int, TerrainGraphInput> GraphInputDict = new();
public int globalSeed;
public TerrainGraph terrainGraph;
async void Initialise()
{
DestroyAllSectors();
TaskDict[0] = InstantiateSectorAsync(0);
}
async void Increment()
{
var highestGeneration = TaskDict.Keys.Max();
int newGeneration = highestGeneration + 1;
TaskDict[newGeneration] = InstantiateSectorAsync(newGeneration);
}
async Task<SectorController> InstantiateSectorAsync(int generation)
{
SectorController sc;
if (generation == 0) // if we are first generation, init with dummy data
{
var inputData = new TerrainGraphInput(globalSeed, generation); // dummy data
var outputData = await Task.Run(() =>terrainGraph.GetGraphOutput(inputData)); // slow function
sc = SectorController.New(outputData);
GraphInputDict[generation] = inputData;
}
else
{
int adder = generation > 0 ? -1 : 1;
TerrainGraphInput inputData;
if (GraphInputDict.Keys.Contains(generation))
{
inputData = GraphInputDict[generation];
}
else if (TaskDict.Keys.Contains(generation + adder))
{
// var previousSectorController = SectorContollerDict[generation + adder];
var previousSectorController = await TaskDict[generation + adder]; // await previous generation
inputData = new TerrainGraphInput(
previousSectorController.sectorData,
previousSectorController.sectorData.EndSeeds,
generation,
globalSeed
);
}
else
{
throw new NoValidInputException();
}
var outputData = await Task.Run(()=>terrainGraph.GetGraphOutput(inputData)); // slow function
sc = SectorController.New(outputData);
GraphInputDict[generation] = inputData;
}
return sc;
}
private void DestroyAllSectors()
{
GraphInputDict = new Dictionary<int, TerrainGraphInput>();
TaskDict = new();
foreach (var sc in GameObject.FindObjectsOfType<SectorController>())
{
sc.DestroyMe();
}
}
}
Related
I have the following code that does what I want but I had to resort to using .GetAwaiter().GetResult() in the middle of asynchronous code to get it. I am wondering if there is an elegant way to achieve this without resorting to such hacks.
This is a simplified version of the code I have.
public async Task<string[]> GetValues(int[] keys)
{
List<int> keysNotYetActivelyRequested = null;
// don't start the task at this point because the
// keysNotYetActivelyRequested is not yet populated
var taskToCreateWithoutStarting = new Task<Dictionary<int, string>>(
() => GetValuesFromApi(keysNotYetActivelyRequested.ToArray())
.GetAwaiter().GetResult() /*not the best idea*/);
(var allTasksToAwait, keysNotYetActivelyRequested) = GetAllTasksToAwait(
keys, taskToCreateWithoutStarting);
if (keysNotYetActivelyRequested.Any())
{
// keysNotYetActivelyRequested will be empty when all keys
// are already part of another active request
taskToCreateWithoutStarting.Start(TaskScheduler.Current);
}
var allResults = await Task.WhenAll(allTasksToAwait);
var theReturn = new string[keys.Length];
for (int i = 0; i < keys.Length; i++)
{
foreach (var result in allResults)
{
if (result.TryGetValue(keys[i], out var value))
{
theReturn[i] = value;
}
}
}
if (keysNotYetActivelyRequested.Any())
{
taskToCreateWithoutStarting.Dispose();
}
return theReturn;
}
// all active requests indexed by the key, used to avoid generating
// multiple requests for the same key
private Dictionary<int, Task<Dictionary<int, string>>> _activeRequests = new();
private (HashSet<Task<Dictionary<int, string>>> allTasksToAwait,
List<int> keysNotYetActivelyRequested) GetAllTasksToAwait(
int[] keys, Task<Dictionary<int, string>> taskToCreateWithoutStarting)
{
var keysNotYetActivelyRequested = new List<int>();
// a HashSet because each task will have multiple keys hence _activeRequests
// will have the same task multiple times
var allTasksToAwait = new HashSet<Task<Dictionary<int, string>>>();
// add cleanup to the task to remove the requested keys from _activeRequests
// once it completes
var taskWithCleanup = taskToCreateWithoutStarting.ContinueWith(_ =>
{
lock (_activeRequests)
{
foreach (var key in keysNotYetActivelyRequested)
{
_activeRequests.Remove(key);
}
}
});
lock (_activeRequests)
{
foreach (var key in keys)
{
// use CollectionsMarshal to avoid a lookup for the same key twice
ref var refToTask = ref CollectionsMarshal.GetValueRefOrAddDefault(
_activeRequests, key, out var exists);
if (exists)
{
allTasksToAwait.Add(refToTask);
}
else
{
refToTask = taskToCreateWithoutStarting;
allTasksToAwait.Add(taskToCreateWithoutStarting);
keysNotYetActivelyRequested.Add(key);
}
}
}
return (allTasksToAwait, keysNotYetActivelyRequested);
}
// not the actual code
private async Task<Dictionary<int, string>> GetValuesFromApi(int[] keys)
{
// request duration dependent on the number of keys
await Task.Delay(keys.Length);
return keys.ToDictionary(k => k, k => k.ToString());
}
And a test method:
[Test]
public void TestGetValues()
{
var random = new Random();
var allTasks = new Task[10];
for (int i = 0; i < 10; i++)
{
var arrayofRandomInts = Enumerable.Repeat(random, random.Next(1, 100))
.Select(r => r.Next(1, 100)).ToArray();
allTasks[i] = GetValues(arrayofRandomInts);
}
Assert.DoesNotThrowAsync(() => Task.WhenAll(allTasks));
Assert.That(_activeRequests.Count, Is.EqualTo(0));
}
Instead of:
Task<Something> coldTask = new(() => GetAsync().GetAwaiter().GetResult());
You can do it like this:
Task<Task<Something>> coldTaskTask = new(() => GetAsync());
Task<Something> proxyTask = coldTaskTask.Unwrap();
The nested task coldTaskTask is the task that you will later Start (or RunSynchronously).
The unwrapped task proxyTask is a proxy that represents both the invocation of the GetAsync method, as well as the completion of the Task<Something> that this method generates.
You should never use the task constructor.
If you want to refer to some code to execute later, use a delegate. Just like you would with synchronous code. The delegate types for asynchronous code are slightly different, but they're still just delegates.
Func<Task<Dictionary<int, string>>> getValuesAsync = () => GetValuesFromApi(keysNotYetActivelyRequested.ToArray());
...
var result = await getValuesAsync();
Also, I strongly recommend replacing ContinueWith with await.
All links are to my blog.
Case 1 Case 2 Case 3 Case 4
Objective:
Using the Injector code i'm trying to inject the stopwatch methods (which are in stopwatch dll) in the desired code location of target dll, inorder to calculate the time taken by each method in the target dll which may or may not be a void method and it may have multiple return statements.
Target dll
public class targetDll
{
void func1(){
//Inject Stopwatch_start(); method here
int a = 3;
int b = 4;
int temp;
temp = a;
a = b;
b =temp;
if (a + b > 2)
{
Console.WriteLine("function____1");
}
#Stopwatch_stop() //Inject stop time here
}
String func2(){
//Inject Stopwatch_start(); method here
int a = 3;
int b = 4;
int c = 5;
int temp;
temp = a;
a = b;
b = c;
c = temp;
if (a + b > 5)
{
Console.WriteLine("function____2");
//inject Stopwatch_stop() method here
return ;
}
a = temp;
//inject Stopwatch_stop(); method here
return;
}
}
Source dll(stopwatch dll)
public static class stopwatch_class
{
static System.Diagnostics.Stopwatch stopwatch_obj = new System.Diagnostics.Stopwatch();
public static void stopwatch_start()
{
stopwatch_obj.Start();
}
public static void stopwatch_stop()
{
stopwatch_obj.Stop();
Console.WriteLine(stopwatch_obj.ElapsedMilliseconds);
}
}
}
Injector code
class Trial_injector
{
static void Main(string[] args)
{
var start_method = (dynamic)null;
var stop_method = (dynamic)null;
AssemblyDefinition target_assembly = AssemblyDefinition.ReadAssembly("targetDll.dll",
new ReaderParameters { ReadWrite = true });
var target_modules = target_assembly.MainModule;
TypeDefinition[] target_module = target_modules.Types.ToArray();
AssemblyDefinition source_assembly = AssemblyDefinition.ReadAssembly("stopwatch.dll", new
ReaderParameters { ReadWrite = true });
var source_modules = source_assembly.MainModule;
TypeDefinition[] source_module = source_modules.Types.ToArray();
foreach (var type in source_module)
{
foreach (var method in type.Methods)
{
if (method.Name == "stopwatch_start")
{
start_method = method;
}
if (method.Name == "stopwatch_stop")
{
stop_method = method;
}
}
}
foreach(var module_ in target_module)
{
foreach(var method_ in module_.Methods)
{
String stg="hello_world";
var processor2 = method_.Body.GetILProcessor();
var first_instruction = method_.Body.Instructions.First();
var last_instruction = method_.Body.Instructions.Last();
var ldstr = processor2.Create(OpCodes.Ldstr, stg);
var call = processor2.Create(OpCodes.Call, method_.Module.Import(start_method));
var call2 = processor2.Create(OpCodes.Call, method_.Module.Import(stop_method));
processor2.InsertBefore(first_instruction, ldstr);
processor2.InsertAfter(first_instruction, call);
processor2.InsertBefore(last_instruction, ldstr);
processor2.InsertBefore(last_instruction, call2);
}
}
target_assembly.Write();
}
You were almost correct with your code. There were few modifications that needed to be done.
Not sure why you need the ldstr opcode as it's not needed anywhere. For the call you want to have that inserted before the first opcode not after. As for the last instruction you could go with InsertBefore. So the final code could be like this:
foreach (var module_ in target_module)
{
foreach (var method_ in module_.Methods)
{
var processor2 = method_.Body.GetILProcessor();
var first_instruction = method_.Body.Instructions.First();
var last_instruction = method_.Body.Instructions.Last();
var call = processor2.Create(OpCodes.Call, method_.Module.Import(start_method));
var call2 = processor2.Create(OpCodes.Call, method_.Module.Import(stop_method));
processor2.InsertBefore(first_instruction, call);
processor2.InsertBefore(last_instruction, call2);
}
}
but that wouldn't work with some early returns. Why? Early returns are coded as a br or br_s opcode to the ret at the end of procedure and if we inject our call before the ret those early returns will skip that. In your example it's not needed as this code is converted to if-else and we have branch in both cases correctly matched. But image we have code like this:
int a = 3;
if (a == 3)
{
return; // very early return here
}
// the rest as in original one
we wont see elapsed time printed for this method as return will direct the execution after our injected call. What we need to do here is to update all branch instructions that are responsible for early returns (so they jump to ret opcode) and point them to our call. We can do this in the following way:
foreach (var bodyInstruction in method_.Body.Instructions)
{
if (bodyInstruction.OpCode != OpCodes.Br && bodyInstruction.OpCode != OpCodes.Br_S) continue;
if (((Instruction)bodyInstruction.Operand).OpCode != OpCodes.Ret) continue;
bodyInstruction.Operand = call2;
}
So what we do here is that we scan through all opcodes and see if we have a br or br_s that jumps to return we update it to jump to our call instead. ViolĂ .
Note: used Elapsed instead of ElapsedMilliseconds as the former was giving all zeros.
Full code:
var start_method = (dynamic) null;
var stop_method = (dynamic) null;
AssemblyDefinition target_assembly = AssemblyDefinition.ReadAssembly("target.exe", new ReaderParameters {ReadWrite = true});
var target_modules = target_assembly.MainModule;
TypeDefinition[] target_module = target_modules.Types.ToArray();
AssemblyDefinition source_assembly = AssemblyDefinition.ReadAssembly("stopwatch.dll", new ReaderParameters {ReadWrite = true});
var source_modules = source_assembly.MainModule;
TypeDefinition[] source_module = source_modules.Types.ToArray();
foreach (var type in source_module)
{
foreach (var method in type.Methods)
{
if (method.Name == "stopwatch_start")
{
start_method = method;
}
if (method.Name == "stopwatch_stop")
{
stop_method = method;
}
}
}
foreach (var module_ in target_module)
{
foreach (var method_ in module_.Methods)
{
var processor2 = method_.Body.GetILProcessor();
var first_instruction = method_.Body.Instructions.First();
var last_instruction = method_.Body.Instructions.Last();
var call = processor2.Create(OpCodes.Call, method_.Module.Import(start_method));
var call2 = processor2.Create(OpCodes.Call, method_.Module.Import(stop_method));
processor2.InsertBefore(first_instruction, call);
processor2.InsertBefore(last_instruction, call2);
foreach (var bodyInstruction in method_.Body.Instructions)
{
if (bodyInstruction.OpCode != OpCodes.Br && bodyInstruction.OpCode != OpCodes.Br_S) continue;
if (((Instruction)bodyInstruction.Operand).OpCode != OpCodes.Ret) continue;
bodyInstruction.Operand = call2;
}
}
}
target_assembly.Write();
self-promotion on
I happen to recorded two videos about doing this (in a bit different way) with Mono.Cecil. You can find it Writing simple .NET execution tracer with Mono.Cecil and Instrumenting .NET assemblies to measure method's execution time in with Mono.Cecil.
self-promotion off
I've been having trouble running multiple tasks with heavy operations.
It seems as if the task processes is killed before all the operations are complete.
The code here is an example code I used to replicate the issue. If I add something like Debug.Write(), the added wait for writing fixes the issue. The issue is gone if I test on a smaller sample size too. The reason there is a class in the example below is to create complexity for the test.
The real case where I encountered the issue first is too complicated to explain for a post here.
public static class StaticRandom
{
static int seed = Environment.TickCount;
static readonly ThreadLocal<Random> random =
new ThreadLocal<Random>(() => new Random(Interlocked.Increment(ref seed)));
public static int Next()
{
return random.Value.Next();
}
public static int Next(int maxValue)
{
return random.Value.Next(maxValue);
}
public static double NextDouble()
{
return random.Value.NextDouble();
}
}
// this is the test function I run to recreate the problem:
static void tasktest()
{
var testlist = new List<ExampleClass>();
for (var index = 0; index < 10000; ++index)
{
var newClass = new ExampleClass();
newClass.Populate(Enumerable.Range(0, 1000).ToList());
testlist.Add(newClass);
}
var anotherClassList = new List<ExampleClass>();
var threadNumber = 5;
if (threadNumber > testlist.Count)
{
threadNumber = testlist.Count;
}
var taskList = new List<Task>();
var tokenSource = new CancellationTokenSource();
CancellationToken cancellationToken = tokenSource.Token;
int stuffPerThread = testlist.Count / threadNumber;
var stuffCounter = 0;
for (var count = 1; count <= threadNumber; ++count)
{
var toSkip = stuffCounter;
var threadWorkLoad = stuffPerThread;
var currentIndex = count;
// these ifs make sure all the indexes are covered
if (stuffCounter + threadWorkLoad > testlist.Count)
{
threadWorkLoad = testlist.Count - stuffCounter;
}
else if (count == threadNumber && stuffCounter + threadWorkLoad < testlist.Count)
{
threadWorkLoad = testlist.Count - stuffCounter;
}
taskList.Add(Task.Factory.StartNew(() => taskfunc(testlist, anotherClassList, toSkip, threadWorkLoad),
cancellationToken, TaskCreationOptions.None, TaskScheduler.Default));
stuffCounter += stuffPerThread;
}
Task.WaitAll(taskList.ToArray());
}
public class ExampleClass
{
public ExampleClassInner[] Inners { get; set; }
public ExampleClass()
{
Inners = new ExampleClassInner[5];
for (var index = 0; index < Inners.Length; ++index)
{
Inners[index] = new ExampleClassInner();
}
}
public void Populate(List<int> intlist) {/*adds random ints to the inner class*/}
public ExampleClass(ExampleClass copyFrom)
{
Inners = new ExampleClassInner[5];
for (var index = 0; index < Inners.Length; ++index)
{
Inners[index] = new ExampleClassInner(copyFrom.Inners[index]);
}
}
public class ExampleClassInner
{
public bool SomeBool { get; set; } = false;
public int SomeInt { get; set; } = -1;
public ExampleClassInner()
{
}
public ExampleClassInner(ExampleClassInner copyFrom)
{
SomeBool = copyFrom.SomeBool;
SomeInt = copyFrom.SomeInt;
}
}
}
static int expensivefunc(int theint)
{
/*a lot of pointless arithmetic and loops done only on primitives and with primitives,
just to increase the complexity*/
theint *= theint + 1;
var anotherlist = Enumerable.Range(0, 10000).ToList();
for (var index = 0; index < anotherlist.Count; ++index)
{
theint += index;
if (theint % 5 == 0)
{
theint *= index / 2;
}
}
var yetanotherlist = Enumerable.Range(0, 50000).ToList();
for (var index = 0; index < yetanotherlist.Count; ++index)
{
theint += index;
if (theint % 7 == 0)
{
theint -= index / 3;
}
}
while (theint > 8)
{
theint /= 2;
}
return theint;
}
// this function is intentionally creating a lot of objects, to simulate complexity
static void taskfunc(List<ExampleClass> intlist, List<ExampleClass> anotherClassList, int skip, int take)
{
if (take == 0)
{
take = intlist.Count;
}
var partial = intlist.Skip(skip).Take(take).ToList();
for (var index = 0; index < partial.Count; ++index)
{
var testint = expensivefunc(index);
var newClass = new ExampleClass(partial[index]);
newDna.Inners[StaticRandom.Next(5)].SomeInt = testint;
anotherClassList.Add(new ExampleClass(newClass));
}
}
The expected result is that the list anotherClassList will be the same size as testlist and this happens when the lists are smaller or the complexity of the task operations is smaller. However, when I increase the volume of operations, the anotherClassList has a few indexes missing and sometimes some of the indexes in the list are null objects.
Example result:
Why does this happen, I have Task.WaitAll?
Your problem is it's just not thread-safe; you just can't add to a list<T> in a multi-threaded environment and expect it to play nice.
One way is to use lock or a thread safe collection, but I feel this all should be refactored (my OCD is going off all over the place).
private static object _sync = new object();
...
private static void TaskFunc(List<ExampleClass> intlist, List<ExampleClass> anotherClassList, int skip, int take)
{
...
var partial = intlist.Skip(skip).Take(take).ToList();
...
// note that locking here will likely drastically decrease any performance threading gain
lock (_sync)
{
for (var index = 0; index < partial.Count; ++index)
{
// this is your problem, you are adding to a list from multiple threads
anotherClassList.Add(...);
}
}
}
In short, I think you need to better thinking about the threading logic of your method, identify what you are trying to achieve, and how to make it conceptually thread safe (while keeping your performance gains).
After TheGeneral enlightened me that Lists are not thread safe, I changed the List to which I was adding in a thread, to an Array type and this fixed my issue.
I am using a restful api that will return a maximum amount of 50 records per call, if you need more than this you must create multiple calls and pass it an offset.
There are times when we require 'all' of the results to be loaded, we are using something that resembles the code below - this does one request after another and adds the results to a list, stopping when either the maximum is reached or the amount returned in any one call was less than the amount requested.
How can refactor this (using tasks/parallel/threads) to load this data with multiple requests at any one time and still get the exact same results, I have looked at creating multiple Tasks and awaiting them but the problem is that the amount of records to load is unknown until the point of 'no more being available' or hitting the max.
public IEnumerable<T> GetItems(int maxAmount = -1)
{
var moreData = true;
var result = new List<T>();
var counter = 0;
var batchAmount = 50;
while(moreData)
{
var requestAmount = Math.Min(batchAmount,maxAmount-result.Count);
var items = GetItemsFromService<T>(requestAmount,counter);
counter += items.Count;
moreData = items.Count == requestAmount && (maxAmount == -1 || maxAmount> items.Count);
result.AddRange(items);
}
return result;
}
private IEnumerable<T> GetItemsFromService(int batchAmount,int offset)
{
//Lets assume that this gets data from a rest service that returns a maximum of batchAmount
//and offsets using the offset variable.
}
Unfortunately you can't use async here as you are relying on the number of items from the previous request. This must be synchronous unless you want to do some asynchronous operations on the data that you've received.
It must be a badly designed API that returns paged result without total pages or total number of items.
I managed to get this working, basically I keep sending the paged request until one of the requests come back with nothing - since they are started in order once a response comes back with nothing we do not need to make anymore requests, just allow existing requests to finish.
My working code looks like this.
private IEnumerable<object> GetEntitiesInParallel(Type type, string apiPath, Dictionary<string, string> parameters, int startPosition, int maxAmount)
{
var context = new TaskThreadingContext(maxAmount, startPosition);
var threads = Enumerable.Range(0, NumberOfThreads).Select(i =>
{
var task = Task.Factory.StartNew(() =>
{
while (context.Continue)
{
var rawData = String.Empty;
var offset = context.NextAmount();
var result = GetEntitiesSingleRequest(type, parameters, offset, apiPath, out rawData);
if (result.Any())
{
context.AddResult(result.Cast<object>(), rawData);
}
else
{
context.NoResult();
}
}
});
return task;
}).ToArray();
Task.WaitAll(threads);
var results = context.GetResults<object>();
return results;
}
private IEnumerable<object> GetEntitiesSingleRequest(Type type,Dictionary<string,string> parameters,
int offset,string apiPath, out string rawData)
{
var request = Utility.CreateRestRequest(apiPath, Method.GET,ApiKey,50,offset,parameters);
type = typeof(List<>).MakeGenericType(type);
var method = Client.GetType().GetMethods().Single(m => m.IsGenericMethod && m.Name == "Execute").MakeGenericMethod(type);
try
{
dynamic response = (IRestResponse)method.Invoke(Client, new object[] { request });
var data = response.Data as IEnumerable;
var dataList = data.Cast<object>().ToList();
rawData = response.Content.Replace("\n", Environment.NewLine);
return dataList.OfType<object>().ToList();
}
catch (Exception ex)
{
if (ex.Message.IndexOf("404") != -1)
{
rawData = null;
return Enumerable.Empty<object>();
}
throw;
}
}
private class TaskThreadingContext
{
private int batchAmount = 50;
private object locker1 = new object();
private object locker2 = new object();
private CancellationTokenSource tokenSource;
private CancellationToken token;
private volatile bool cont = true;
private volatile int offset = 0;
private volatile int max = 0;
private volatile int start = 0;
private List<object> result = new List<object>();
private List<string> raw = new List<string>();
public bool Continue { get { return cont; } }
public TaskThreadingContext(int maxRows = 0,int startPosition = 0)
{
max = maxRows;
offset = start = startPosition;
}
public int NextAmount()
{
lock(locker1)
{
var ret = offset;
var temp = offset + batchAmount;
if (temp - start > max && max > 0)
{
temp = max - offset;
}
offset = temp;
if (offset - start >= max && max > 0)
{
cont = false;
}
return ret;
}
}
public TaskThreadingContext()
{
tokenSource = new CancellationTokenSource();
token = tokenSource.Token;
}
public void AddResult(IEnumerable<object> items,string rawData)
{
lock(locker2)
{
result.AddRange(items);
raw.Add(rawData);
}
}
public IEnumerable<T> GetResults<T>()
{
return this.result.Cast<T>().ToList();
}
public void NoResult()
{
cont = false;
}
}
I have a restartable program that runs over a very large space and I have started parallelizing it some. Each Task runs independently and updates a database with its results. It doesn't matter if tasks are repeated (they are fully deterministic based on the input array and will simply generate the same result they did before), but doing so is relatively inefficient. So far I have come up with the following pattern:
static void Main(string[] args) {
GeneratorStart = Storage.Load();
var tasks = new List<Task>();
foreach (int[] temp in Generator()) {
var arr = temp;
var task = new Task(() => {
//... use arr as needed
});
task.Start();
tasks.Add(task);
if (tasks.Count > 4) {
Task.WaitAll(tasks.ToArray());
Storage.UpdateStart(temp);
tasks = new List<Task>();
}
}
}
Prior to making the generator restartable, I had a simple Parallel.Foreach loop on it and was a bit faster. I think I am losing some CPU time with the WaitAll operation. How can I get rid of this bottleneck while keeping track of what tasks I don't have to run again when I restart?
Other bits for those concerned (shortened for brevity to question):
class Program {
static bool Done = false;
static int[] GeneratorStart = null;
static IEnumerable<int[]> Generator() {
var s = new Stack<int>();
//... omitted code to initialize stack to GeneratorStart for brevity
yield return s.ToArray();
while (!Done) {
Increment(s);
yield return s.Reverse().ToArray();
}
}
static int Base = 25600; //example number (none of this is important
static void Increment(Stack<int> stack) { //outside the fact
if (stack.Count == 0) { //that it is generating an array
stack.Push(1); //of a large base
return; //behaving like an integer
} //with each digit stored in an
int i = stack.Pop(); //array position)
i++;
if (i < Base) {
stack.Push(i);
return;
}
Increment(stack);
stack.Push(0);
}
}
I've come up with this:
var tasks = new Queue<Pair<int[],Task>>();
foreach (var temp in Generator()) {
var arr = temp;
tasks.Enqueue(new Pair<int[], Task>(arr, Task.Run(() ={
//... use arr as needed
}));
var tArray = t.Select(v => v.Value).Where(t=>!t.IsCompleted).ToArray();
if (tArray.Length > 7) {
Task.WaitAny(tArray);
var first = tasks.Peek();
while (first != null && first.B.IsCompleted) {
Storage.UpdateStart(first.A);
tasks.Dequeue();
first = tasks.Count == 0 ? null : tasks.Peek();
}
}
}
...
class Pair<TA,TB> {
public TA A { get; set; }
public TB B { get; set; }
public Pair(TA a, TB b) { A = a; B = b; }
}