.net - api call is blocking other api calls - c#

I have an issue with an endpoint blocking calls from other endpoints in my app. When we call this endpoint, this basically blocks all other api calls from executing, and they need to wait until this is finished.
public async Task<ActionResult> GrantAccesstoUsers()
{
// other operations
var grantResult = await
this._workSpaceProvider.GrantUserAccessAsync(this.CurrentUser.Id).ConfigureAwait(false);
return this.Ok(result);
}
The GrantUserAccessAsync method calls set of tasks that will run on a parallel.
public async Task<List<WorkspaceDetail>> GrantUserAccessAsync(string currentUser)
{
var responselist = new List<WorkspaceDetail>();
try
{
// calling these prematurely to be reused once threads are created
// none expensive calls
var properlyNamedWorkSpaces = await this._helper.GetProperlyNamedWorkspacesAsync(true).ConfigureAwait(false);
var dbGroups = await this._reportCatalogProvider.GetWorkspaceFromCatalog().ConfigureAwait(false);
var catalogInfo = await this._clientServiceHelper.GetDatabaseConfigurationAsync("our-service").ConfigureAwait(false);
if (properlyNamedWorkSpaces != null && properlyNamedWorkSpaces.Count > 0)
{
// these methods returns tasks for parallel processing
var grantUserContributorAccessTaskList = await this.GrantUserContributorAccessTaskList(properlyNamedWorkSpaces, currentUser, dbGroups, catalogInfo).ConfigureAwait(false);
var grantUserAdminAccessTaskList = await this.GrantUserAdminAccessTaskList(properlyNamedWorkSpaces, currentUser, dbGroups, catalogInfo).ConfigureAwait(false);
var removeInvalidUserAndSPNTaskList = await this.RemoveAccessRightsToWorkspaceTaskList(properlyNamedWorkSpaces, dbGroups, currentUser, catalogInfo).ConfigureAwait(false);
var tasklist = new List<Task<WorkspaceDetail>>();
tasklist.AddRange(grantUserContributorAccessTaskList);
tasklist.AddRange(grantUserAdminAccessTaskList);
tasklist.AddRange(removeInvalidUserAndSPNTaskList);
// Start running Parallel Task
Parallel.ForEach(tasklist, task =>
{
Task.Delay(this._config.CurrentValue.PacingDelay);
task.Start();
});
// Get All Client Worspace Processing Results
var clientWorkspaceProcessingResult = await Task.WhenAll(tasklist).ConfigureAwait(false);
// Populate result
responselist.AddRange(clientWorkspaceProcessingResult.ToList());
}
}
catch (Exception)
{
throw;
}
return responselist;
}
These methods are basically identical in structure and they look like this:
private async Task<List<Task<WorkspaceDetail>>> GrantUserContributorAccessTaskList(List<Group> workspaces, string currentUser, List<WorkspaceManagement> dbGroups, DatabaseConfig catalogInfo)
{
var tasklist = new List<Task<WorkspaceDetail>>();
foreach (var workspace in workspaces)
{
tasklist.Add(new Task<WorkspaceDetail>(() =>
this.GrantContributorAccessToUsers(workspace, currentUser, dbGroups, catalogInfo).Result));
// i added a delay here because we encountered an issue before in production and this seems to solve the problem. this is set to 4ms.
Task.Delay(this._config.CurrentValue.DelayInMiliseconds);
}
return tasklist;
}
The other methods called here looks like this:
private async Task<WorkspaceDetail> GrantContributorAccessToUsers(Group workspace, string currentUser, List<Data.ReportCatalogDB.WorkspaceManagement> dbGroups, DatabaseConfig catalogInfo)
{
// This prevents other thread or task to start and prevents exceeding the number of threads allowed
await this._batchProcessor.WaitAsync().ConfigureAwait(false);
var result = new WorkspaceDetail();
try
{
var contributorAccessresult = await this.helper.GrantContributorAccessToUsersAsync(workspace, this._powerBIConfig.CurrentValue.SPNUsers).ConfigureAwait(false);
if (contributorAccessresult != null
&& contributorAccessresult.Count > 0)
{
// do something
}
else
{
// do something
}
// this is done to reuse the call that is being executed in the helper above. it's an expensive call from an external endpoint so we opted to reuse what was used in the initial call, instead of calling it again for this process
var syncWorkspaceAccessToDb = await this.SyncWorkspaceAccessAsync(currentUser, workspace.Id, contributorAccessresult, dbGroups, catalogInfo).ConfigureAwait(false);
foreach (var dbResponse in syncWorkspaceAccessToDb) {
result.ResponseMessage += dbResponse.ResponseMessage;
}
}
catch (Exception ex)
{
this._loghelper.LogEvent(this._logger, logEvent, OperationType.GrantContributorAccessToWorkspaceManager, LogEventStatus.FAIL);
}
finally
{
this._batchProcessor.Release();
}
return result;
}
The last method called writes the record in a database table:
private async Task<List<WorkspaceDetail>> SyncWorkspaceAccessAsync(string currentUser,
Guid workspaceId,
List<GroupUser> groupUsers,
List<WorkspaceManagement> dbGroups,
DatabaseConfig catalogInfo) {
var result = new List<WorkspaceDetail>();
var tasklist = new List<Task<WorkspaceDetail>>();
// get active workspace details from the db
var workspace = dbGroups.Where(x => x.PowerBIGroupId == workspaceId).FirstOrDefault();
try
{
// to auto dispose the provider, we are creating this for each instance because
// having only one instance creates an error when the other task starts running
using (var contextProvider = this._contextFactory.GetReportCatalogProvider(
catalogInfo.Server,
catalogInfo.Database,
catalogInfo.Username,
catalogInfo.Password,
this._dbPolicy))
{
if (workspace != null)
{
// get current group users in the db from the workspace object
var currentDbGroupUsers = workspace.WorkspaceAccess.Where(w => w.Id == workspace.Id
&& w.IsDeleted == false).ToList();
#region identify to process
#region users to add
// identify users to add
var usersToAdd = groupUsers.Where(g => !currentDbGroupUsers.Any(w => w.Id == workspace.Id ))
.Select(g => new WorkspaceAccess
{
// class properties
}).ToList();
#endregion
var addTasks = await this.AddWorkspaceAccessToDbTask(catalogProvider, usersToAdd, workspace.PowerBIGroupId, workspace.WorkspaceName).ConfigureAwait(false);
tasklist.AddRange(addTasks);
// this is a potential fix that i did, hoping adding another parallel thread can solve the problem
Parallel.ForEach(tasklist, new ParallelOptions { MaxDegreeOfParallelism = this._config.CurrentValue.MaxDegreeOfParallelism }, task =>
{
Task.Delay(this._config.CurrentValue.PacingDelay);
task.Start();
});
var processResult = await Task.WhenAll(tasklist).ConfigureAwait(false);
// Populate result
result.AddRange(processResult.ToList());
}
}
}
catch (Exception ex)
{
// handle error
}
return result;
}
I tried some potential solutions already, like the methods here are written with Task.FromResult before instead of async so I changed that. Reference is from this thread:
Using Task.FromResult v/s await in C#
Also, I thought it was a similar issue that we faced before when we are creating multiple db context connections needed when running multiple parallel tasks by adding a small delay on tasks but that didn't solve the problem.
Task.Delay(this._config.CurrentValue.DelayInMiliseconds);
Any help would be much appreciated.

I assume your this._batchProcessor is an instance of SemaphoreSlim. If your other endpoints somehow call
await this._batchProcessor.WaitAsyc()
that means they can't go further until semaphor will be released.
Another thing I'd like to mention: please avoid using Parallel.ForEach with async/await. TPL is not designed to work with async/await, here is good answer why you should avoid using them together: Nesting await in Parallel.ForEach

Related

SemaphoreSlim vs Parallel Processing for API post calls in .NET

I have a chron job which calls a database table and gets about half a million records returned. I need to loop through all of that data, and send API post's to a third party API. In general, this works fine, but the processing time is forever (10 hours). I need a way to speed it up. I've been trying to use a list of Task with SemaphoreSlim, but running into issues (it doesn't like that my api call returns a Task). I'm wondering if anyone has a solution to this that won't destroy the VM's memory?
Current code looks something like:
foreach(var data in dataList)
{
try
{
var response = await _apiService.PostData(data);
_logger.Trace(response.Message);
} catch//
}
But I'm trying to do this and getting the syntax wrong:
var tasks = new List<Task<DataObj>>();
var throttler = new SemaphoreSlim(10);
foreach(var data in dataList)
{
await throttler.WaitAsync();
tasks.Add(Task.Run(async () => {
try
{
var response = await _apiService.PostData(data);
_logger.Trace(response.Message);
}
finally
{
throttler.Release();
}
}));
}
Your list is of type Task<DataObj>, but your async lambda doesn't return anything, so its return type is Task. To fix the syntax, just return the value:
var response = await _apiService.PostData(data);
_logger.Trace(response.Message);
return response;
As others have noted in the comments, I also recommend not using Task.Run here. A local async method would work fine:
var tasks = new List<Task<DataObj>>();
var throttler = new SemaphoreSlim(10);
foreach(var data in dataList)
{
tasks.Add(ThrottledPostData(data));
}
var results = await Task.WhenAll(tasks);
async Task<DataObj> ThrottledPostData(Data data)
{
await throttler.WaitAsync();
try
{
var response = await _apiService.PostData(data);
_logger.Trace(response.Message);
return response;
}
finally
{
throttler.Release();
}
}

Running same task multiple times in parallel with EF Core

I have a Task that generates a PDF file for an order (it takes about 10 seconds to create one PDF):
public async Task GeneratePDF(Guid Id) {
var order = await
_context
.Orders
.Include(order => order.Customer)
... //a lot more Include and ThenInclude statements
.FirstOrDefaultAsync(order ==> order.Id == Id);
var document = ... //PDF generated here, takes about 10 seconds
order.PDF = document ;
_context.SaveChangesAsync();
}
I tried the following:
public async Task GenerateAllPDFs() {
var orderIds = await _context.Orders.Select(order=> order.Id).ToListAsync();
foreach (var id in orderIds)
{
_ = GeneratePDF(id).ContinueWith(t => Console.WriteLine(t.Exception), TaskContinuationOptions.OnlyOnFaulted);
}
}
this gives me the error:
System.ObjectDisposedException: Cannot access a disposed object. A common cause of this error is disposing a context that was resolved from dependency injection and then later trying to use the same context instance elsewhere in your application. This may occur if you are calling Dispose() on the context, or wrapping the context in a using statement. If you are using dependency injection, you should let the dependency injection container take care of disposing context instances.
If I change the task as follows...
public async Task GenerateAllPDFs() {
var orderIds = await _context.Orders.Select(order=> order.Id).ToListAsync();
foreach (var id in orderIds)
{
_ = await GeneratePDF(id);
}
}
...it runs the task for each order in series, taking ages to complete (I have a few thousands orders, taking about 10 seconds per order)...
How can I run this task in parallel for all orders in the context, so that the time it takes to complete is much less than sequential processing?
You can map your order IDs to tasks and await them all like:
public async Task GeneratePDF(Order order) {
var document = ... //PDF generated here, takes about 10 seconds
order.PDF = document ;
}
public async Task GenerateAllPDFs() {
var orderIds = await _context.Orders.ToListAsync();
var tasks = orderIds.Select((order) => GeneratePDF(order).ContinueWith(t => Console.WriteLine(t.Exception), TaskContinuationOptions.OnlyOnFaulted));
await Task.WhenAll(tasks);
await _context.SaveChangesAsync();
}
Here is my suggestion from the comment as an answer. I would split it in 3 parts:
1) get all orders,
2) then do a Parallel.Foreach to generate all documents in parallel. and assign each document to the proper order and in the end
3) do a single _context.SaveChangesAsync(); to make a bulk update on the data on the server
public async Task GenerateAllPDFs()
{
var allOrders = await _context.Orders.ToListAsync();
System.Threading.Tasks.Parallel.ForEach(allOrders, order =>
{
var document = ... //PDF generated here, takes about 10 seconds
order.PDF = document ;
});
await _context.SaveChangesAsync();
}
You need to implement parallel programing.
https://learn.microsoft.com/en-us/dotnet/standard/parallel-programming/task-based-asynchronous-programming
public class Example
{
public static void Main()
{
Task[] taskArray = new Task[10];
for (int i = 0; i < taskArray.Length; i++) {
taskArray[i] = Task.Factory.StartNew( (Object obj ) => {
CustomData data = obj as CustomData;
if (data == null)
return;
data.ThreadNum = Thread.CurrentThread.ManagedThreadId;
},
new CustomData() {Name = i, CreationTime = DateTime.Now.Ticks} );
}
Task.WaitAll(taskArray);
foreach (var task in taskArray) {
var data = task.AsyncState as CustomData;
if (data != null)
Console.WriteLine("Task #{0} created at {1}, ran on thread #{2}.",
data.Name, data.CreationTime, data.ThreadNum);
}
}
}
I think I will have to "duplicate" the GeneratePDF method to facilitate the batch processing by implementing the other answers , since I need this method also in non-batch mode...

Why doesn't my function await the end of ForEachAsync?

I'm trying to fill my model with data that I get from an asynchronous operation to my database. The problem is that the function returns the View (without the completed model), despite my await call.
I have tried to put a timer (I know that is not the solution), to be sure that the problem come from the asynchronous, I have also tried to put on comment some part of code inside my ForEachAsync, but it doesn't seem to help.
I get a list of project, that I fill with some additional information, finally, I assign my object to my model then return the View
public async Task<IActionResult> newProjetList(int GestionaireId, int VilleId)
{
ProjetListModel model = new ProjetListModel();
ProjetService projetService = new ProjetService(connectionString);
UserServices userServices = new UserServices(connectionString);
AvancementService avancementService = new AvancementService(connectionString);
VilleService villeService = new VilleService(connectionString);
List<Projet> projets = await projetService.NewProjetLine(GestionaireId, VilleId);
await projets.ToAsyncEnumerable().ForEachAsync(async p =>
{
int villeId = await villeService.getVilleIdByProjetId(p.ProjetId);
Ville ville = await villeService.GetById(villeId);
p.Ville = ville.VilleLabel;
p.GestionnaireProjet = await userServices.GetNameById(p.GestionnaireProjetId ?? 0);
await p.SousProjet.ToAsyncEnumerable().ForEachAsync(async sp =>
{
sp.Avancement = await avancementService.GetLabelById(sp.AvancementId);
});
});
model.projets = projets;
//System.Threading.Thread.Sleep(5000);
return View("ProjetList", model);
}
I expected an output with the missing information (here are the 'ville', 'gestionnairesProjet' and 'Avancement'
ForEachAsync only takes an Action<...>, not a Func<..., Task>, so the async lambda your code is passing to ForEachAsync is becoming an async void method. One of the primary reasons async void should be avoided is that it's not easy to determine when the method completes - and in fact, in this case, there is nothing ensuring that it will complete before sending the response.
I recommend doing what Marc suggested and just using foreach:
List<Projet> projets = await projetService.NewProjetLine(GestionaireId, VilleId);
foreach (var p in projects)
{
int villeId = await villeService.getVilleIdByProjetId(p.ProjetId);
Ville ville = await villeService.GetById(villeId);
p.Ville = ville.VilleLabel;
p.GestionnaireProjet = await userServices.GetNameById(p.GestionnaireProjetId ?? 0);
foreach (var sp in p.SousProject)
{
sp.Avancement = await avancementService.GetLabelById(sp.AvancementId);
}
}
model.projets = projets;
Or, if you want to use asynchronous concurrency, you can make use of Task.WhenAll:
List<Projet> projets = await projetService.NewProjetLine(GestionaireId, VilleId);
await Task.WhenAll(projects.Select(async p =>
{
int villeId = await villeService.getVilleIdByProjetId(p.ProjetId);
Ville ville = await villeService.GetById(villeId);
p.Ville = ville.VilleLabel;
p.GestionnaireProjet = await userServices.GetNameById(p.GestionnaireProjetId ?? 0);
await Task.WhenAll(p.SousProject.Select(async sp =>
{
sp.Avancement = await avancementService.GetLabelById(sp.AvancementId);
});
});
model.projets = projets;
Use ForEachAwaitAsync instead of ForEachAsync
Explanation: ForEachAsync can't wait since it is simply a multi-threaded execution of your loop (accepts Action). In fact you might have received a compiler warning by using async for your lambda return, because you intend to assign a Task to a void (unused)
ForEachAwaitAsync will wait because it accepts a Func<Task> and internally also awaits / asyncs accordingly.
For curious minds, you can see the source code here: https://github.com/dotnet/reactive/blob/main/Ix.NET/Source/System.Linq.Async/System/Linq/Operators/ForEach.cs mentioned as ForEachAwaitAsyncCore and ForEachAsync

Avoiding resource leaks with `FirstAsync` observer tasks that never happen

I have some software with an event-based networking protocol for control which is using a IObservable<Event> for handling in bound messages.
In many cases a sent message will expect a specific response (or sequence, such as to report progress). In order to not potentially miss the response a task is being set up in advance with FirstAsync and ToTask, however these appear to leak if the task never completes.
It is also not allowed to simply put evtTask in a using block as trying to dispose the incomplete task is not allowed.
var jobUuid = Guid.NewGuid();
var evtTask = Events.FirstAsync((x) => x.Action == Action.JobComplete && x.JobUuid == jobUuid).ToTask();
// e.g. if this throws without ever sending the message
await SendMessage($"job {jobUuid} download {url}");
var evt = await evtTask;
if (evt.Success)
{
...
}
Does the library provide a simple means for this use-case that will unsubscribe on leaving the scope?
var jobUuid = Guid.NewGuid();
using(var evtTask = Events.FirstAsync((x) => x.Action == Action.JobComplete && x.JobUuid == jobUuid)
.ToDisposableTask())) // Some method like this
{
// e.g. if this throws without ever sending the message
await SendMessage($"job {jobUuid} download {url}");
var evt = await evtTask;
if (evt.Success)
{
...
}
} // Get rid of the FirstAsync task if leave here before it completes for any reason
Disposing Task will not help, since it does nothing useful (in most situations, including this one). What will help though is cancelling task. Cancelling disposes underlying subscription created by ToTask and so, resolves this "leak".
So it can go like this:
Task<Event> evtTask;
using (var cts = new CancellationTokenSource()) {
evtTask = Events.FirstAsync((x) => x.Action == Action.JobComplete && x.JobUuid == jobUuid)
.ToTask(cts.Token);
// e.g. if this throws without ever sending the message
try {
await SendMessage($"job {jobUuid} download {url}");
}
catch {
cts.Cancel(); // disposes subscription
throw;
}
}
var evt = await evtTask;
if (evt.Success)
{
...
}
Of course you can wrap that in some more convenient form (like extension method). For example:
public static class ObservableExtensions {
public static CancellableTaskWrapper<T> ToCancellableTask<T>(this IObservable<T> source) {
return new CancellableTaskWrapper<T>(source);
}
public class CancellableTaskWrapper<T> : IDisposable
{
private readonly CancellationTokenSource _cts;
public CancellableTaskWrapper(IObservable<T> source)
{
_cts = new CancellationTokenSource();
Task = source.ToTask(_cts.Token);
}
public Task<T> Task { get; }
public void Dispose()
{
_cts.Cancel();
_cts.Dispose();
}
}
}
Then it becomes close to what you want:
var jobUuid = Guid.NewGuid();
using (var evtTask = Events.FirstAsync((x) => x.Action == Action.JobComplete && x.JobUuid == jobUuid).ToCancellableTask()) {
await SendMessage($"job {jobUuid} download {url}");
var evt = await evtTask.Task;
if (evt.Success) {
...
}
}
You can either use TPL Timeout (as referenced by #Fabjan), or the Rx/System.Reactive version of Timeout.
using sounds nice, but doesn't make sense. Using is the equivalent of calling .Dispose on something at the end of the using block. The problem here, I'm assuming, is that your code never gets past await evtTask. Throwing all of that in a hypothetical using wouldn't change anything: Your code is still waiting forever.
At a higher level, your code is more imperative than reactive, you may want to refactor it to something like this:
var subscription = Events
.Where(x => x.Action == Action.JobComplete)
.Subscribe(x =>
{
if(x.Success)
{
//...
}
else
{
//...
}
});

C# ForEach Loop With ASync Tasks & Dependent Post ASync Tasks

I'm having trouble trying to correctly architect the most efficient way to iterate several async tasks launched from a request object and then performing some other async tasks that depend on both the request object and the result of the first async task. I'm running a C# lambda function in AWS. I've tried a model like this (error handling and such has been omitted for brevity):
public async Task MyAsyncWrapper()
{
List<Task> Tasks = new List<Task>();
foreach (var Request in Requests)
{
var Continuation = this.ExecuteAsync(Request).ContinueWith(async x => {
var KeyValuePair<bool, string> Result = x.Result;
if (Result.Key == true)
{
await this.DoSomethingElseAsync(Request.Id, Request.Name, Result.Value);
Console.WriteLine("COMPLETED");
}
}
Tasks.Add(Continuation);
}
Task.WaitAll(Tasks.ToArray());
}
This approach results in the DoSomethingElseAsync() method not really getting awaited on and in a lot of my Lambda Function calls, I never get the "COMPLETED" output. I've also approached this in this method:
public async Task MyAsyncWrapper()
{
foreach (var Request in Requests)
{
KeyValuePair<bool, string> Result = await this.ExecuteAsync(Request);
if (Result.Key == true)
{
await this.DoSomethingElseAsync(Request.Id, Request.Name, Result.Value);
Console.WriteLine("COMPLETED");
}
}
}
This works, but I think it's wasteful, since I can only execute one iteration of the loop while waiting on the asnyc's to finish. I also have referenced Interleaved Tasks but the issue is that I basically have two loops, one to populate the tasks, and another to iterate them after they've completed, where I don't have access to the original Request object anymore. So basically this:
List<Task<KeyValuePair<bool, string>>> Tasks = new List<Task<KeyValuePair<bool, string>>>();
foreach (var Request in Requests)
{
Tasks.Add(ths.ExecuteAsync(Request);
}
foreach (Task<KeyValuePair<bool, string>> ResultTask in Tasks.Interleaved())
{
KeyValuePair<bool, string> Result = ResultTask.Result;
//Can't access the original request for this method's parameters
await this.DoSomethingElseAsync(???, ???, Result.Value);
}
Any ideas on better ways to implement this type of async chaining in a foreach loop? My ideal approach wouldn't be to return the request object back as part of the response from ExecuteAsync(), so I'd like to try and find other options if possible.
I may be misinterpreting, but why not move your "iteration" into it's own function and then use Task.WhenAll to wait for all iterations in parallel.
public async Task MyAsyncWrapper()
{
var allTasks = Requests.Select(ProcessRequest);
await Task.WhenAll(allTasks);
}
private async Task ProcessRequest(Request request)
{
KeyValuePair<bool, string> Result = await this.ExecuteAsync(request);
if (Result.Key == true)
{
await this.DoSomethingElseAsync(request.Id, request.Name, Result.Value);
Console.WriteLine("COMPLETED");
}
}
Consider using TPL dataflow:
var a = new TransformBlock<Input, OutputA>(async Input i=>
{
// do something async.
return new OutputA();
});
var b = new TransformBlock<OutputA, OutputB>(async OutputA i =>
{
// do more async.
return new OutputB();
});
var c = new ActionBlock<OutputB>(async OutputB i =>
{
// do some final async.
});
a.LinkTo(b, new DataflowLinkOptions { PropogateCompletion = true });
b.LinkTo(c, new DataflowLinkOptions { PropogateCompletion = true });
// push all of the items into the dataflow.
a.Post(new Input());
a.Complete();
// wait for it all to complete.
await c.Completion;

Categories

Resources