Hello everyone and thanks for helping me in advance. The following question might sound stupid and incorrect but I'm a beginner about it.
I have a method that gets some information from my database and sends it to an external database using a post call and a patch call in case the information has changed. I use EF Framework. In that db table there are at least 165k rows.
My question is the following: There is a way to optimize and speed up all the process? Maybe using multi threading, parallelism? I'm a beginner about it and I hope some of you help me understand.
The method is the following:
public async Task<List<dynamic>> SyncOrdersTaskAsync(int PageSize)
{
int PageIndex = 0;
if (PageSize <= 0) PageSize = 100;
const string phrase = "The fields order, task_code must make a unique set";
var sorting = new SortingCriteria {
Properties = new string[] { "WkOpenDate ASC" } };
List<dynamic> listTest = new List<dynamic>();
using (var uow = this.Factory.BeginUnitOfWork())
{
var repo = uow.GetRepository<IWorkOrderRepository>();
var count = await repo.CountAllAsync();
count = 150;
for (PageIndex = 0; PageIndex <= count / PageSize; PageIndex++)
{
var paging = new PagingCriteria
{
PageIndex = PageIndex,
PageSize = PageSize
};
var rows = await repo.GetByCriteriaAsync(
"new {WkID, CompanyID, JobNo, JobTaskNo ,WkNumber, WkYear," +
"WkYard,WkCustomerID,CuName,WkDivisionID,DvName,BusinessUnit," +
"BusinessUnitManagerID,BusinessUnitManager,WkWorkTypeID,WtName," +
"WkActivityID,WkActivityDescription,NoteDescrLavoro,WkWOManagerID," +
"ProjectManager,IDMaster,ProjectCoordinator,WkOpenDate," +
"WkDataChiusa,Prov,CodiceSito,CodiceOffice,CodiceLavorazione," +
"CodiceNodo,DescrizioneNodo,WkPrevisionalStartDate,WkRealStartDate," +
"WkPrevisionalEndDate,WkRealEndDate,NumeroOrdine," +
"WkPrevisionalLabourAmount,TotaleCosti,SumOvertimeHours," +
"SumTravelHours,SumNormalHours,WkProgressPercentage,Stato,CUP,CIG," +
"TotaleManodopera,TotalePrestazioni,TotaleNoli,TotaleMateriali," +
"SumAuxiliaryHours,TipoCommessa,TotaleOrdine, WkPreventivoData," +
"WkConsuntivoData,TotaleFatturato,AggregateTotaleFatturato," +
"AggregateTotalePrestazioni,Contract,CustomerOrderNumber," +
"XmeWBECode,LastUpdateDate,PreGestWkID,CommercialNotes,Mandant," +
"GammaProjectName,WkInventoryDate,WkCloseFlag,WkNote," +
"TotalRegisteredLabour,TotalRegisteredPerformances," +
"TotalRegisteredLeasings,TotalRegisteredMaterials,FlagFinalBalance," +
"FinalBalance,OrderDate,TotalOrderDivision,SearchDescription," +
"TotaleBefToBeApproved,TotaleBefToBeApprovedLeasings," +
"TotaleLabourToBeApproved,AggregateLevel, AggregateTotalLabour," +
"AggregateTotalLeasings,AggregateTotalMaterials," +
"AggregateTotalRegisteredLabour," +
"AggregateTotalRegisteredPerformances," +
"AggregateTotalRegisteredLeasings," +
"AggregateTotalRegisteredMaterials," +
"AggregateTotalCost,AggregateSumNormalHours," +
"AggregateSumAuxiliaryHours,AggregateSumRainHours," +
"AggregateSumTravelHours,AggregateSumOvertimeHours," +
"AggregateWkPrevisionalLabourAmount,AggregateFinalBalance," +
"AggregateTotalOrder,AggregateTotalOrderDivision," +
"AggregateTotalBefToBeApproved," +
"AggregateTotalBefToBeApprovedLeasings," +
"AggregateTotalLabourToBeApproved,TotalProduction," +
"AggregateTotalProduction,JobTaskDescription}", paging, sorting);
String url = appSettings.Value.UrlV1 + "order_tasks/";
using (var httpClient = new HttpClient())
{
httpClient.DefaultRequestHeaders.Add("Authorization", "Token " +
await this.GetApiKey(true));
if (rows.Count() > 0)
{
foreach (var row in rows)
{
var testWork = (Model.WorkOrderCompleteInfo)Mapper
.MapWkOrdersCompleteInfo(row);
var orderIdDiv = await this.GetOrderForSyncing(httpClient,
testWork.JobNo);
var jsonTest = new JObject();
jsonTest["task_code"] = testWork.JobTaskNo;
jsonTest["description"] = testWork.JobTaskDescription;
jsonTest["order"] = orderIdDivitel.Id;
jsonTest["order_date"] = testWork.OrderDate.HasValue
? testWork.OrderDate.Value.ToString("yyyy-MM-dd")
: string.IsNullOrEmpty(testWork.OrderDate.ToString())
? "1970-01-01"
: testWork.OrderDate.ToString().Substring(0, 10);
jsonTest["progress"] = testWork.WkProgressPercentage;
var content = new StringContent(jsonTest.ToString(),
Encoding.UTF8, "application/json");
var result = await httpClient.PostAsync(url, content);
if (result.Content != null)
{
var responseContent = await result.Content
.ReadAsStringAsync();
bool alreadyExists = phrase.All(responseContent.Contains);
if (alreadyExists)
{
var taskCase = await GetTaskForSyncing(httpClient,
testWork.JobTaskNo, orderIdDiv.Id.ToString());
var idCase = taskCase.Id;
String urlPatch = appSettings.Value.UrlV1 +
"order_tasks/" + idCase + "/";
bool isSame = taskCase.Equals(testWork
.toSolOrderTask());
if (!isSame)
{
var resultPatch = await httpClient.PatchAsync(
urlPatch, content);
if (resultPatch != null)
{
var responsePatchContent = await resultPatch
.Content.ReadAsStringAsync();
var jsonPatchContent = JsonConvert
.DeserializeObject<dynamic>(
responsePatchContent);
listTest.Add(jsonPatchContent);
}
}
else
{
listTest.Add(taskCase.JobTaskNo_ +
" is already updated!");
}
}
else
{
var jsonContent = JsonConvert
.DeserializeObject<dynamic>(responseContent);
listTest.Add(jsonContent);
}
}
}
}
}
}
return listTest;
}
}
Maybe I need to apply parallelism in the for loop?
Again, really thanks to everyone in advance and I hope I was clear :)
The most handy tool that is currently available for parallelizing asynchronous work is the Parallel.ForEachAsync method. It was introduced in .NET 6. Your code is quite complex though, and deciding where to put this loop is not obvious.
Ideally you would like to call the Parallel.ForEachAsync only once, so that it parallelizes your work with a single configurable degree of parallelism from start to finish. Generally you don't want to put this method inside an outer for/foreach loop, because then the degree of parallelism will fluctuate during the whole operation. But since your code is complex, I would go the easy way and do just that. I would replace this code:
foreach (var row in rows)
{
//...
}
...with this:
ParallelOptions options = new() { MaxDegreeOfParallelism = 2 };
await Parallel.ForEachAsync(rows, options, async (row, _) =>
{
//...
});
You have to make one more change. The List<T> is not thread safe, and so it will get corrupted if you call Add from multiple threads without synchronization. You can either add a lock (listTest) before each listTest.Add, or replace it with a concurrent collection. My suggestion is to do the later:
ConcurrentQueue<dynamic> listTest = new();
//...
listTest.Enqueue(jsonContent);
//...
return listTest.ToList();
After doing these changes, hopefully your code will still work correctly, and it will be running a bit faster. Then you'll have to experiment with the MaxDegreeOfParallelism setting, until you find the one that yields the optimal performance. Don't go crazy with large values like 100 or 1000. In most cases overparallelizing is harmful, and might yield worse performance than not parallelizing at all.
I'm trying to copy large set of files from one S3 to another S3, using asynchronous method. To achieve the same, the large set of files is broken into batches and each batch is handed over to a list of async method. The issue is, each async method is not processing more than 1 file in the batch, whereas each batch contains more than 1k files, not sure why async doesn't go back to process the remaining files.
Here is the code:
public void CreateAndExecuteSpawn(string srcBucket, List<List<string>> pdfFileList, IAmazonS3 s3client)
{
int i = 0;
List<Action> actions = new List<Action>();
LambdaLogger.Log("PDF Set count: " + pdfFileList.Count.ToString());
foreach (var list in pdfFileList)
actions.Add(() => RenameFilesAsync(srcBucket, list, s3client));
foreach (var method in actions)
{
method.Invoke();
LambdaLogger.Log("Mehtod invoked: "+ i++.ToString());
}
}
public async void RenameFilesAsync(string srcBucket, List<string> pdfFiles, IAmazonS3 s3client)
{
LambdaLogger.Log("In RenameFileAsync method");
CopyObjectRequest copyRequest = new CopyObjectRequest
{
SourceBucket = srcBucket,
DestinationBucket = srcBucket
};
try
{
foreach (var file in pdfFiles)
{
if (!file.Contains("index.xml"))
{
string[] newFilename = file.Split('{');
string[] destKey = file.Split('/');
copyRequest.SourceKey = file;
copyRequest.DestinationKey = destKey[0] + "/" + destKey[1] + "/Renamed/" + newFilename[1];
LambdaLogger.Log("About to rename File: " + file);
//Here after copying one file, function doesn't return to foreach loop
CopyObjectResponse response = await s3client.CopyObjectAsync(copyRequest);
//await s3client.CopyObjectAsync(copyRequest);
LambdaLogger.Log("Rename done: ");
}
}
}
catch(Exception ex)
{
LambdaLogger.Log(ex.Message);
LambdaLogger.Log(copyRequest.DestinationKey);
}
}
public void FunctionHandler(S3Event evnt, ILambdaContext context)
{
//Some code here
CreateAndExecuteSpawn(bucket, pdfFileSet, s3client);
}
First you need to fix the batch so that it will process the batches one at a time. Avoid async void; use async Task instead:
public async Task CreateAndExecuteSpawnAsync(string srcBucket, List<List<string>> pdfFileList, IAmazonS3 s3client)
{
int i = 0;
List<Func<Task>> actions = new();
LambdaLogger.Log("PDF Set count: " + pdfFileList.Count.ToString());
foreach (var list in pdfFileList)
actions.Add(() => RenameFilesAsync(srcBucket, list, s3client));
foreach (var method in actions)
{
await method();
LambdaLogger.Log("Mehtod invoked: "+ i++.ToString());
}
}
public async Task RenameFilesAsync(string srcBucket, List<string> pdfFiles, IAmazonS3 s3client)
Then you can add asynchronous concurrency within each batch. The current code is just a foreach loop, so of course it only processes one at a time. You can change this to be asynchronously concurrent by Selecting the tasks to run and then doing a Task.WhenAll at the end:
LambdaLogger.Log("In RenameFileAsync method");
CopyObjectRequest copyRequest = new CopyObjectRequest
{
SourceBucket = srcBucket,
DestinationBucket = srcBucket
};
try
{
var tasks = pdfFiles
.Where(file => !file.Contains("index.xml"))
.Select(async file =>
{
string[] newFilename = file.Split('{');
string[] destKey = file.Split('/');
copyRequest.SourceKey = file;
copyRequest.DestinationKey = destKey[0] + "/" + destKey[1] + "/Renamed/" + newFilename[1];
LambdaLogger.Log("About to rename File: " + file);
CopyObjectResponse response = await s3client.CopyObjectAsync(copyRequest);
LambdaLogger.Log("Rename done: ");
})
.ToList();
await Task.WhenAll(tasks);
}
I need to call a API 5000 times, Presently with the current logic its happening synchronously one by one. Is there any way to call it asynchronously without actually waiting for the API response. Code Below.
while (true)
{
using (HttpClient httpclient = new HttpClient())
{// ***Want to call the API Asynchronously***
for (int i = 0; i < 5000; i++)
{
DateTime dt = DateTime.Now;
dt = dt.AddSeconds(-dt.Second);
Log[] data1 = new Log[]
{
log =new Log(){LogID=Guid.NewGuid(),LogLevel=new LogLevel(){ },Message="Maverick_Messgaes",Source="Maverick",StackTrace="Maverick Started",
Time=dt,Traceid="1"},
};
var response4 = httpclient.PostAsJsonAsync("http://localhost:8095/api/Log/PostAsync", data1).Result;
}
}
//logstack.Clear();
Console.WriteLine(log.Message + log.Time + " ");
Thread.Sleep(120000);
Console.WriteLine(" " + " 5000 messages Sent.. Iterating Again" + "" + DateTime.Now.ToString());
}
}
catch(Exception ex)
{ throw ex; }
}
You could replace your for-loop with a Parallel.For loop to run the code within the loop in parallel.
This guide provides a good introduction with examples: https://learn.microsoft.com/en-us/dotnet/standard/parallel-programming/how-to-write-a-simple-parallel-for-loop
In its simplest form, it would look like:
Parallel.For(0, 5000, i =>
{
// code within existing for-loop goes here...
});
If you're concerned about the number of concurrent tasks, there are overloads that take a ParallelOptions parameter, within which you can specify the MaxDegreeOfParallelism.
Looks like you are not doing anything with the result so I am assuming you don't need to return it.
You need to make your method async and await the HttpClient synchronous call just like the code below.
static void Main(string[] args)
{
using (var client = new HttpClient())
{
for (int i = 0; i < 10; i++)
{
Console.WriteLine("Continuing iteration " + i);
PostData(client);
}
Console.ReadKey();
}
}
static async void PostData(HttpClient client)
{
await client.GetStringAsync("https://www.google.com.ph");
Console.WriteLine("Async call done");
}
I understand async javascript but aync .NET has a different approach and I still haven't got my head around it properly.
I have a list of URLs that I would like to check. I would like to check them asynchronously and get the first one that returns a certain status code. In this case I am looking for status code 401 (Unauthorized) as this indicates that it is a login challenge, which is what I am expecting. So I can't just use Task.WaitAny because I need to run some code to see which one matches my status code first.
Can anyone give me an example of how you run a callback on an aync task and then stop all the other tasks if you found what you want?
I am using .NET 4 in this project and would prefer to stick with this if possible. I have the System.Net.Http.HttpClient nuget package installed.
UPDATE:
I have put together the following code, which I have finally got to produce the correct results, except I think it is waiting for each task - missing the whole point of being async. Not sure about use of new Task() or t.Wait() within the inner task but it seem the only way to catch the exception. (Exceptions happen on DNS fail and connection timeouts - I don't know a better way to handle those than catching and ignoring the exceptions.)
Any advice on improving this code to make it actually async?
public async Task<ActionResult> Test() {
//var patterns = GetPatterns();
var patterns = "http://stackoverflow.com/,https://www.google.com,http://www.beweb.co.nz,https://outlook.office365.com/Microsoft-Server-ActiveSync,http://rubishnotexist.com".Split(",").ToList();
var httpClient = new System.Net.Http.HttpClient();
string result = "";
CancellationTokenSource source = new CancellationTokenSource();
CancellationToken cancellationToken = source.Token;
var allTasks = new List<Task>();
foreach (var pattern in patterns) {
var url = pattern;
Task task = new Task(() => {
string answer = "";
var st = DateTime.Now;
var t = httpClient.GetAsync(pattern, HttpCompletionOption.ResponseHeadersRead, cancellationToken);
t.ContinueWith(d => {
if (!source.IsCancellationRequested) {
if (t.IsFaulted) {
answer = "Fault - " + " " + url;
} else if (d.Result.StatusCode == System.Net.HttpStatusCode.Unauthorized) {
// found it - so cancel all others
answer = "YES - " + d.Result.StatusCode + " " + url;
//source.Cancel();
} else {
answer = "No - " + d.Result.StatusCode + " " + url;
}
}
result += answer + " ("+(DateTime.Now-st).TotalMilliseconds+"ms)<br>";
});
try {
t.Wait();
} catch (Exception) {
// ignore eg DNS fail and connection timeouts
}
});
allTasks.Add(task);
task.Start();
}
// Wait asynchronously for all of them to finish
Task.WaitAll(allTasks.ToArray());
return Content(result + "<br>DONE");
}
In the above I didn't have the cancellation part working. Here is a version including cancellation:
public async Task<ActionResult> Test2(string email) {
var patterns = GetPatterns(email);
patterns = "http://stackoverflow.com/,https://www.google.com,http://www.beweb.co.nz,https://outlook.office365.com/Microsoft-Server-ActiveSync,http://rubishnotexist.com".Split(",").ToList();
var httpClient = new System.Net.Http.HttpClient();
string result = "";
CancellationTokenSource source = new CancellationTokenSource();
CancellationToken cancellationToken = source.Token;
var allTasks = new List<Task>();
foreach (var pattern in patterns) {
var url = pattern;
Task task = new Task(() => {
string answer = "";
var st = DateTime.Now;
var t = httpClient.GetAsync(pattern, HttpCompletionOption.ResponseHeadersRead, cancellationToken);
t.ContinueWith(d => {
if (!source.IsCancellationRequested) {
if (t.IsFaulted) {
answer = "Fault - " + " " + url;
} else if (d.Result.StatusCode == System.Net.HttpStatusCode.Unauthorized) {
// found it - so cancel all others
answer = "YES - " + d.Result.StatusCode + " " + url;
result += answer + " (" + (DateTime.Now - st).TotalMilliseconds + "ms) <-- cancelled here <br>";
source.Cancel();
} else {
answer = "No - " + d.Result.StatusCode + " " + url;
}
} else {
answer = "cancelled - " + url;
}
result += answer + " (" + (DateTime.Now - st).TotalMilliseconds + "ms)<br>";
});
try {
t.Wait();
} catch (Exception) {
// ignore
}
});
allTasks.Add(task);
task.Start();
}
// Wait asynchronously for all of them to finish
Task.WaitAll(allTasks.ToArray());
return Content(result + "<br>DONE");
}
Use Task.WhenAll() instead, then examine the results of the tasks.
To prevent other tasks continuing once any one throws an exception, you can create a single CancellationToken (by first creating a CancellationTokenSource, then using its .Token) that you pass to all the tasks, and on failure, you cancel the token; see How to cancel and raise an exception on Task.WhenAll if any exception is raised? for more details and sample code. All the tasks then observe the token, and optionally explicitly check it occasionally and exit if it's canceled. They should also pass it on to those methods that support it, so they, in turn, can cancel quickly when the token is canceled.
Re exceptions, this answer covers them pretty well. If you want no exception thrown into the calling code, you should handle the exception within each task create instead, but then you will need to modify the above canceling mechanism accordingly. You could instead just catch the single exception that await Task.WhenAll() might throw and at that point observe all the exceptions thrown in the Task.Exception property of each task, or ignore them if that is the desired result.
Re canceling on success (from the comments) - I guess there are many ways to do it, but one could be:
using (var cts = new CancellationTokenSource())
{
var tasks = new List<Task<HttpStatusCode>>();
foreach (var url in patterns)
{
tasks.Add(GetStatusCodeAsync(url, cts.Token));
}
while (tasks.Any() && !cts.IsCancellationRequested)
{
Task<HttpStatusCode> task = await Task.WhenAny(tasks);
if (await task == HttpStatusCode.Unauthorized)
{
cts.Cancel();
// Handle the "found" situation
// ...
}
else
{
tasks.Remove(task);
}
}
}
and then put your HttpClient code in a separate method:
private static async Task<HttpStatusCode> GetStatusCodeAsync(object url, CancellationToken token)
{
try
{
// Your HttpClient code
// ...
await <things>;
// (pass token on to methods that support it)
// ...
return httpStatusCode;
}
catch (Exception e)
{
// Don't rethrow if you handle everything here
return HttpStatusCode.Unused; // (or whatever)
}
}
I have a method like so...
static async Task GetLeads(ForceClient client)
{
Console.WriteLine("Get Leads");
var accts = new List<Lead>();
var results = await client.QueryAsync<Lead>(Lead._select);
var totalSize = results.TotalSize;
Console.WriteLine("Queried " + totalSize + " leads.");
accts.AddRange(results.Records);
Console.WriteLine("Added " + results.Records.Count + " leads...");
var nextRecordsUrl = results.NextRecordsUrl;
if (!string.IsNullOrEmpty(nextRecordsUrl))
{
Console.WriteLine("Found more records...");
while (true)
{
var continuationResults = await client.QueryContinuationAsync<Lead>(nextRecordsUrl);
Console.WriteLine("Queried an additional " + continuationResults.Records.Count + " leads.");
accts.AddRange(continuationResults.Records);
if (string.IsNullOrEmpty(continuationResults.NextRecordsUrl)) break;
nextRecordsUrl = continuationResults.NextRecordsUrl;
}
}
Upsert(accts, Lead.target);
}
I have another method like so..
static async Task GetSupplierProducts(ForceClient client)
{
Console.WriteLine("Get SupplierProduct");
var accts = new List<SupplierProduct>();
var results = await client.QueryAsync<SupplierProduct>(SupplierProduct._select);
var totalSize = results.TotalSize;
Console.WriteLine("Queried " + totalSize + " SupplierProduct.");
accts.AddRange(results.Records);
Console.WriteLine("Added " + results.Records.Count + " SupplierProduct...");
var nextRecordsUrl = results.NextRecordsUrl;
if (!string.IsNullOrEmpty(nextRecordsUrl))
{
Console.WriteLine("Found more records...");
while (true)
{
var continuationResults = await client.QueryContinuationAsync<SupplierProduct>(nextRecordsUrl);
Console.WriteLine("Queried an additional " + continuationResults.Records.Count + " SupplierProduct.");
accts.AddRange(continuationResults.Records);
if (string.IsNullOrEmpty(continuationResults.NextRecordsUrl)) break;
nextRecordsUrl = continuationResults.NextRecordsUrl;
}
}
Upsert(accts, SupplierProduct.target);
}
How can I make a method that abstracts this behavior generically?
The types Lead and SupplierProduct must somehow be related in the sense that they either implement the same interface or inherit from the same base class to make this work. Apparently the similarities are _select which apparently is a static member which cannot be included in an interface. Furthermore, the generation of human-readable strings would have to be refactored into the types.
If Base would be hypothetical base class, the signature of the generic function would have to be as follows.
static async Task Get<T>(ForceClient client) where T : Base
Assuming both Lead and SupplierProduct inherits the same Parent:
static async Task GetMyInstance<T>(ForceClient client) where T : Parent
{
Console.WriteLine("Get " + T.GetType().Name);
var accts = new List<T>();
var results = await client.QueryAsync<T>(T._select);
var totalSize = results.TotalSize;
Console.WriteLine("Queried " + totalSize + " " + T.GetType().Name +".");
accts.AddRange(results.Records);
Console.WriteLine("Added " + results.Records.Count + T.GetType().Name + "...");
var nextRecordsUrl = results.NextRecordsUrl;
if (!string.IsNullOrEmpty(nextRecordsUrl))
{
Console.WriteLine("Found more records...");
while (true)
{
var continuationResults = await client.QueryContinuationAsync<T>(nextRecordsUrl);
Console.WriteLine("Queried an additional " + continuationResults.Records.Count + " " + T.GetType().Name + ".");
accts.AddRange(continuationResults.Records);
if (string.IsNullOrEmpty(continuationResults.NextRecordsUrl)) break;
nextRecordsUrl = continuationResults.NextRecordsUrl;
}
}
Upsert(accts, T.target);
}
Please note that Parent should contain _select and target for this to work
And you call it like this:
var foo = GetMyInstance<Lead>(forceClient);
or
var foo = GetMyInstance<SupplierProduct>(forceClient);
This likely won't suffice fully, but when I want to make multiple partial-similar methods accessible through a single function, I tend to pass target-strings and use a repeating switch-statement on them.
This way we can also run multiple actions after each other by supplying an array of targets.
Note; this code isn't debugged or anything, I just wanted to point you towards how switch-statements might be of some use to you. It wasn't possible for me to give a more comprehensive answer because I can't fully understand the intent of your code.
static async Task GetRecordsFor(ForceClient client, string[] targets )
{
foreach (string target in targets){
switch ( target )
{
case 'leads':
Console.WriteLine("Get Leads");
var accts = new List<Lead>();
// more specific code for fetching leads
break;
case 'suppliers':
Console.WriteLine("Get SupplierProduct");
var accts = new List<SupplierProduct>();
// more specific code for fetching suppliers
break;
}
// Actions you want to perform on each of these.
accts.AddRange(continuationResults.Records);
}
}