How to optimize and speed up an asynchronous method with database calls - c#

Hello everyone and thanks for helping me in advance. The following question might sound stupid and incorrect but I'm a beginner about it.
I have a method that gets some information from my database and sends it to an external database using a post call and a patch call in case the information has changed. I use EF Framework. In that db table there are at least 165k rows.
My question is the following: There is a way to optimize and speed up all the process? Maybe using multi threading, parallelism? I'm a beginner about it and I hope some of you help me understand.
The method is the following:
public async Task<List<dynamic>> SyncOrdersTaskAsync(int PageSize)
{
int PageIndex = 0;
if (PageSize <= 0) PageSize = 100;
const string phrase = "The fields order, task_code must make a unique set";
var sorting = new SortingCriteria {
Properties = new string[] { "WkOpenDate ASC" } };
List<dynamic> listTest = new List<dynamic>();
using (var uow = this.Factory.BeginUnitOfWork())
{
var repo = uow.GetRepository<IWorkOrderRepository>();
var count = await repo.CountAllAsync();
count = 150;
for (PageIndex = 0; PageIndex <= count / PageSize; PageIndex++)
{
var paging = new PagingCriteria
{
PageIndex = PageIndex,
PageSize = PageSize
};
var rows = await repo.GetByCriteriaAsync(
"new {WkID, CompanyID, JobNo, JobTaskNo ,WkNumber, WkYear," +
"WkYard,WkCustomerID,CuName,WkDivisionID,DvName,BusinessUnit," +
"BusinessUnitManagerID,BusinessUnitManager,WkWorkTypeID,WtName," +
"WkActivityID,WkActivityDescription,NoteDescrLavoro,WkWOManagerID," +
"ProjectManager,IDMaster,ProjectCoordinator,WkOpenDate," +
"WkDataChiusa,Prov,CodiceSito,CodiceOffice,CodiceLavorazione," +
"CodiceNodo,DescrizioneNodo,WkPrevisionalStartDate,WkRealStartDate," +
"WkPrevisionalEndDate,WkRealEndDate,NumeroOrdine," +
"WkPrevisionalLabourAmount,TotaleCosti,SumOvertimeHours," +
"SumTravelHours,SumNormalHours,WkProgressPercentage,Stato,CUP,CIG," +
"TotaleManodopera,TotalePrestazioni,TotaleNoli,TotaleMateriali," +
"SumAuxiliaryHours,TipoCommessa,TotaleOrdine, WkPreventivoData," +
"WkConsuntivoData,TotaleFatturato,AggregateTotaleFatturato," +
"AggregateTotalePrestazioni,Contract,CustomerOrderNumber," +
"XmeWBECode,LastUpdateDate,PreGestWkID,CommercialNotes,Mandant," +
"GammaProjectName,WkInventoryDate,WkCloseFlag,WkNote," +
"TotalRegisteredLabour,TotalRegisteredPerformances," +
"TotalRegisteredLeasings,TotalRegisteredMaterials,FlagFinalBalance," +
"FinalBalance,OrderDate,TotalOrderDivision,SearchDescription," +
"TotaleBefToBeApproved,TotaleBefToBeApprovedLeasings," +
"TotaleLabourToBeApproved,AggregateLevel, AggregateTotalLabour," +
"AggregateTotalLeasings,AggregateTotalMaterials," +
"AggregateTotalRegisteredLabour," +
"AggregateTotalRegisteredPerformances," +
"AggregateTotalRegisteredLeasings," +
"AggregateTotalRegisteredMaterials," +
"AggregateTotalCost,AggregateSumNormalHours," +
"AggregateSumAuxiliaryHours,AggregateSumRainHours," +
"AggregateSumTravelHours,AggregateSumOvertimeHours," +
"AggregateWkPrevisionalLabourAmount,AggregateFinalBalance," +
"AggregateTotalOrder,AggregateTotalOrderDivision," +
"AggregateTotalBefToBeApproved," +
"AggregateTotalBefToBeApprovedLeasings," +
"AggregateTotalLabourToBeApproved,TotalProduction," +
"AggregateTotalProduction,JobTaskDescription}", paging, sorting);
String url = appSettings.Value.UrlV1 + "order_tasks/";
using (var httpClient = new HttpClient())
{
httpClient.DefaultRequestHeaders.Add("Authorization", "Token " +
await this.GetApiKey(true));
if (rows.Count() > 0)
{
foreach (var row in rows)
{
var testWork = (Model.WorkOrderCompleteInfo)Mapper
.MapWkOrdersCompleteInfo(row);
var orderIdDiv = await this.GetOrderForSyncing(httpClient,
testWork.JobNo);
var jsonTest = new JObject();
jsonTest["task_code"] = testWork.JobTaskNo;
jsonTest["description"] = testWork.JobTaskDescription;
jsonTest["order"] = orderIdDivitel.Id;
jsonTest["order_date"] = testWork.OrderDate.HasValue
? testWork.OrderDate.Value.ToString("yyyy-MM-dd")
: string.IsNullOrEmpty(testWork.OrderDate.ToString())
? "1970-01-01"
: testWork.OrderDate.ToString().Substring(0, 10);
jsonTest["progress"] = testWork.WkProgressPercentage;
var content = new StringContent(jsonTest.ToString(),
Encoding.UTF8, "application/json");
var result = await httpClient.PostAsync(url, content);
if (result.Content != null)
{
var responseContent = await result.Content
.ReadAsStringAsync();
bool alreadyExists = phrase.All(responseContent.Contains);
if (alreadyExists)
{
var taskCase = await GetTaskForSyncing(httpClient,
testWork.JobTaskNo, orderIdDiv.Id.ToString());
var idCase = taskCase.Id;
String urlPatch = appSettings.Value.UrlV1 +
"order_tasks/" + idCase + "/";
bool isSame = taskCase.Equals(testWork
.toSolOrderTask());
if (!isSame)
{
var resultPatch = await httpClient.PatchAsync(
urlPatch, content);
if (resultPatch != null)
{
var responsePatchContent = await resultPatch
.Content.ReadAsStringAsync();
var jsonPatchContent = JsonConvert
.DeserializeObject<dynamic>(
responsePatchContent);
listTest.Add(jsonPatchContent);
}
}
else
{
listTest.Add(taskCase.JobTaskNo_ +
" is already updated!");
}
}
else
{
var jsonContent = JsonConvert
.DeserializeObject<dynamic>(responseContent);
listTest.Add(jsonContent);
}
}
}
}
}
}
return listTest;
}
}
Maybe I need to apply parallelism in the for loop?
Again, really thanks to everyone in advance and I hope I was clear :)

The most handy tool that is currently available for parallelizing asynchronous work is the Parallel.ForEachAsync method. It was introduced in .NET 6. Your code is quite complex though, and deciding where to put this loop is not obvious.
Ideally you would like to call the Parallel.ForEachAsync only once, so that it parallelizes your work with a single configurable degree of parallelism from start to finish. Generally you don't want to put this method inside an outer for/foreach loop, because then the degree of parallelism will fluctuate during the whole operation. But since your code is complex, I would go the easy way and do just that. I would replace this code:
foreach (var row in rows)
{
//...
}
...with this:
ParallelOptions options = new() { MaxDegreeOfParallelism = 2 };
await Parallel.ForEachAsync(rows, options, async (row, _) =>
{
//...
});
You have to make one more change. The List<T> is not thread safe, and so it will get corrupted if you call Add from multiple threads without synchronization. You can either add a lock (listTest) before each listTest.Add, or replace it with a concurrent collection. My suggestion is to do the later:
ConcurrentQueue<dynamic> listTest = new();
//...
listTest.Enqueue(jsonContent);
//...
return listTest.ToList();
After doing these changes, hopefully your code will still work correctly, and it will be running a bit faster. Then you'll have to experiment with the MaxDegreeOfParallelism setting, until you find the one that yields the optimal performance. Don't go crazy with large values like 100 or 1000. In most cases overparallelizing is harmful, and might yield worse performance than not parallelizing at all.

Related

Aspnet core Parallels operation DbContext problem

I've a problem when I use Pararrel function in aspnetcore, in particular when in the cycle i try to save something in database. I get my data from externarl api and deseserialize it in my class.
This is the Parallel code.
Root players = JsonConvert.DeserializeObject<Root>(responseStream);
var bulkhead = Policy.BulkheadAsync(10, Int32.MaxValue);
var tasks = new List<Task>();
foreach (var player in players.players)
{
var t = bulkhead.ExecuteAsync(async () =>
{
int wyId = Convert.ToInt32(player.wyId);
HttpRequestMessage secondRequest = createRequest("https://apirest.com/v2/players/" + wyId + "?details=currentTeam&imageDataURL=true");
var client2 = _clientFactory.CreateClient();
var response2 = await client2.SendAsync(secondRequest);
if (response2.IsSuccessStatusCode)
{
var responseStream2 = await response2.Content.ReadAsStringAsync();
dynamic playerFullDetails = JsonConvert.DeserializeObject<dynamic>(responseStream2);
int wyId2 = Convert.ToInt32(playerFullDetails.wyId);
int marketValue = 0;
HttpRequestMessage tirthRequest = createRequest("https://apirest.com/v2/players/" + wyId2 + "/marketvalue");
var client3 = _clientFactory.CreateClient();
var response3 = await client3.SendAsync(tirthRequest);
if (response3.IsSuccessStatusCode)
{
var responseStream3 = await response3.Content.ReadAsStringAsync();
dynamic marketValueResponse = JsonConvert.DeserializeObject<dynamic>(responseStream3);
if (marketValueResponse.marketValue != 0)
{
marketValue = Convert.ToInt32(marketValueResponse.marketValue);
}
}
DateTime birthday = Convert.ToDateTime(playerFullDetails.birthDate);
int age = DateTime.Now.Year - birthday.Year;
Player finalPlayer = new Player();
finalPlayer.PlayerId = wyId2;
finalPlayer.MarketValue = marketValue;
finalPlayer.Value = Convert.ToDouble(marketValue) / Convert.ToDouble(1000000);
finalPlayer.Firstname = playerFullDetails.firstName;
finalPlayer.Lastname = playerFullDetails.lastName;
finalPlayer.Name = playerFullDetails.shortName;
finalPlayer.Position = playerFullDetails.role.name;
finalPlayer.Height = playerFullDetails.height;
finalPlayer.Foot = playerFullDetails.foot;
finalPlayer.IsLocked = false;
finalPlayer.Team = playerFullDetails.currentTeam != null ? playerFullDetails.currentTeam.name : "";
finalPlayer.TeamId = playerFullDetails.currentTeam != null ? playerFullDetails.currentTeam.wyId : 0;
finalPlayer.CompetitionId = 524;
finalPlayer.UpdatedDay = DateTime.Now;
finalPlayer.League = "Serie A";
finalPlayer.Age = age;
Player playerExist = await _context.Player.Where(x => x.PlayerId == wyId2).SingleOrDefaultAsync();
if (playerExist == null)
{
if (finalPlayer.TeamId != 0)
{
await _context.Player.AddAsync(finalPlayer);
await _context.SaveChangesAsync();
}
}
if (finalPlayer.TeamId != 0)
{
Team teamExist = await _context.Team.Where(x => x.TeamId == finalPlayer.TeamId).SingleOrDefaultAsync();
if (teamExist == null)
{
Team team = new Team();
team.TeamId = finalPlayer.TeamId;
team.TeamName = finalPlayer.Team;
await _context.Team.AddAsync(team);
await _context.SaveChangesAsync();
}
}
}
});
tasks.Add(t);
}
await Task.WhenAll(tasks);
The function isert 50/60 (in total would be 500) element in db and finally i receive this error
A second operation was started on this context before a previous operation completed. This is usually caused by different threads concurrently using the same instance of DbContext. For more information on how to avoid threading issues with DbContext, see https://go.microsoft.com/fwlink/?linkid=2097913.
Thanks
It's best practice to use 1 dbcontext per unit of work, and the dbcontext is not thread safe
So either create a new dbcontext per thread or lock access with someting like a semaphore otherwise you will always get unstable code

JSON Array to Entity Framework Core VERY Slow?

I'm working on a utility to read through a JSON file I've been given and to transform it into SQL Server. My weapon of choice is a .NET Core Console App (I'm trying to do all of my new work with .NET Core unless there is a compelling reason not to). I have the whole thing "working" but there is clearly a problem somewhere because the performance is truly horrifying almost to the point of being unusable.
The JSON file is approximately 27MB and contains a main array of 214 elements and each of those contains a couple of fields along with an array of from 150-350 records (that array has several fields and potentially a small <5 record array or two). Total records are approximately 35,000.
In the code below I've changed some names and stripped out a few of the fields to keep it more readable but all of the logic and code that does actual work is unchanged.
Keep in mind, I've done a lot of testing with the placement and number of calls to SaveChanges() think initially that number of trips to the Db was the problem. Although the version below is calling SaveChanges() once for each iteration of the 214-record loop, I've tried moving it outside of the entire looping structure and there is no discernible change in performance. In other words, with zero trips to the Db, this is still SLOW. How slow you ask, how does > 24 hours to run hit you? I'm willing to try anything at this point and am even considering moving the whole process into SQL Server but would much reather work in C# than TSQL.
static void Main(string[] args)
{
string statusMsg = String.Empty;
JArray sets = JArray.Parse(File.ReadAllText(#"C:\Users\Public\Downloads\ImportFile.json"));
try
{
using (var _db = new WidgetDb())
{
for (int s = 0; s < sets.Count; s++)
{
Console.WriteLine($"{s.ToString()}: {sets[s]["name"]}");
// First we create the Set
Set eSet = new Set()
{
SetCode = (string)sets[s]["code"],
SetName = (string)sets[s]["name"],
Type = (string)sets[s]["type"],
Block = (string)sets[s]["block"] ?? ""
};
_db.Entry(eSet).State = Microsoft.EntityFrameworkCore.EntityState.Added;
JArray widgets = sets[s]["widgets"].ToObject<JArray>();
for (int c = 0; c < widgets.Count; c++)
{
Widget eWidget = new Widget()
{
WidgetId = (string)widgets[c]["id"],
Layout = (string)widgets[c]["layout"] ?? "",
WidgetName = (string)widgets[c]["name"],
WidgetNames = "",
ReleaseDate = releaseDate,
SetCode = (string)sets[s]["code"]
};
// WidgetColors
if (widgets[c]["colors"] != null)
{
JArray widgetColors = widgets[c]["colors"].ToObject<JArray>();
for (int cc = 0; cc < widgetColors.Count; cc++)
{
WidgetColor eWidgetColor = new WidgetColor()
{
WidgetId = eWidget.WidgetId,
Color = (string)widgets[c]["colors"][cc]
};
_db.Entry(eWidgetColor).State = Microsoft.EntityFrameworkCore.EntityState.Added;
}
}
// WidgetTypes
if (widgets[c]["types"] != null)
{
JArray widgetTypes = widgets[c]["types"].ToObject<JArray>();
for (int ct = 0; ct < widgetTypes.Count; ct++)
{
WidgetType eWidgetType = new WidgetType()
{
WidgetId = eWidget.WidgetId,
Type = (string)widgets[c]["types"][ct]
};
_db.Entry(eWidgetType).State = Microsoft.EntityFrameworkCore.EntityState.Added;
}
}
// WidgetVariations
if (widgets[c]["variations"] != null)
{
JArray widgetVariations = widgets[c]["variations"].ToObject<JArray>();
for (int cv = 0; cv < widgetVariations.Count; cv++)
{
WidgetVariation eWidgetVariation = new WidgetVariation()
{
WidgetId = eWidget.WidgetId,
Variation = (string)widgets[c]["variations"][cv]
};
_db.Entry(eWidgetVariation).State = Microsoft.EntityFrameworkCore.EntityState.Added;
}
}
}
_db.SaveChanges();
}
}
statusMsg = "Import Complete";
}
catch (Exception ex)
{
statusMsg = ex.Message + " (" + ex.InnerException + ")";
}
Console.WriteLine(statusMsg);
Console.ReadKey();
}
I had an issue with that kind of code, lots of loops and tons of changing state.
Any change / manipulation you make in _db context, will generate a "trace" of it. And it making your context slower each time. Read more here.
The fix for me was to create new EF context(_db) at some key points. It saved me a few hours per run!
You could try to create a new instance of _db each iteration in this loop
contains a main array of 214 elements
If it make no change, try to add some stopwatch to get a best idea of what/where is taking so long.
If you're making thousands of updates then EF is not really the way to go. Something like SQLBulkCopy will do the trick.
You could try the bulkwriter library.
IEnumerable<string> ReadFile(string path)
{
using (var stream = File.OpenRead(path))
using (var reader = new StreamReader(stream))
{
while (reader.Peek() >= 0)
{
yield return reader.ReadLine();
}
}
}
var items =
from line in ReadFile(#"C:\products.csv")
let values = line.Split(',')
select new Product {Sku = values[0], Name = values[1]};
then
using (var bulkWriter = new BulkWriter<Product>(connectionString)) {
bulkWriter.WriteToDatabase(items);
}

Making several similar specific methods generic

I have a method like so...
static async Task GetLeads(ForceClient client)
{
Console.WriteLine("Get Leads");
var accts = new List<Lead>();
var results = await client.QueryAsync<Lead>(Lead._select);
var totalSize = results.TotalSize;
Console.WriteLine("Queried " + totalSize + " leads.");
accts.AddRange(results.Records);
Console.WriteLine("Added " + results.Records.Count + " leads...");
var nextRecordsUrl = results.NextRecordsUrl;
if (!string.IsNullOrEmpty(nextRecordsUrl))
{
Console.WriteLine("Found more records...");
while (true)
{
var continuationResults = await client.QueryContinuationAsync<Lead>(nextRecordsUrl);
Console.WriteLine("Queried an additional " + continuationResults.Records.Count + " leads.");
accts.AddRange(continuationResults.Records);
if (string.IsNullOrEmpty(continuationResults.NextRecordsUrl)) break;
nextRecordsUrl = continuationResults.NextRecordsUrl;
}
}
Upsert(accts, Lead.target);
}
I have another method like so..
static async Task GetSupplierProducts(ForceClient client)
{
Console.WriteLine("Get SupplierProduct");
var accts = new List<SupplierProduct>();
var results = await client.QueryAsync<SupplierProduct>(SupplierProduct._select);
var totalSize = results.TotalSize;
Console.WriteLine("Queried " + totalSize + " SupplierProduct.");
accts.AddRange(results.Records);
Console.WriteLine("Added " + results.Records.Count + " SupplierProduct...");
var nextRecordsUrl = results.NextRecordsUrl;
if (!string.IsNullOrEmpty(nextRecordsUrl))
{
Console.WriteLine("Found more records...");
while (true)
{
var continuationResults = await client.QueryContinuationAsync<SupplierProduct>(nextRecordsUrl);
Console.WriteLine("Queried an additional " + continuationResults.Records.Count + " SupplierProduct.");
accts.AddRange(continuationResults.Records);
if (string.IsNullOrEmpty(continuationResults.NextRecordsUrl)) break;
nextRecordsUrl = continuationResults.NextRecordsUrl;
}
}
Upsert(accts, SupplierProduct.target);
}
How can I make a method that abstracts this behavior generically?
The types Lead and SupplierProduct must somehow be related in the sense that they either implement the same interface or inherit from the same base class to make this work. Apparently the similarities are _select which apparently is a static member which cannot be included in an interface. Furthermore, the generation of human-readable strings would have to be refactored into the types.
If Base would be hypothetical base class, the signature of the generic function would have to be as follows.
static async Task Get<T>(ForceClient client) where T : Base
Assuming both Lead and SupplierProduct inherits the same Parent:
static async Task GetMyInstance<T>(ForceClient client) where T : Parent
{
Console.WriteLine("Get " + T.GetType().Name);
var accts = new List<T>();
var results = await client.QueryAsync<T>(T._select);
var totalSize = results.TotalSize;
Console.WriteLine("Queried " + totalSize + " " + T.GetType().Name +".");
accts.AddRange(results.Records);
Console.WriteLine("Added " + results.Records.Count + T.GetType().Name + "...");
var nextRecordsUrl = results.NextRecordsUrl;
if (!string.IsNullOrEmpty(nextRecordsUrl))
{
Console.WriteLine("Found more records...");
while (true)
{
var continuationResults = await client.QueryContinuationAsync<T>(nextRecordsUrl);
Console.WriteLine("Queried an additional " + continuationResults.Records.Count + " " + T.GetType().Name + ".");
accts.AddRange(continuationResults.Records);
if (string.IsNullOrEmpty(continuationResults.NextRecordsUrl)) break;
nextRecordsUrl = continuationResults.NextRecordsUrl;
}
}
Upsert(accts, T.target);
}
Please note that Parent should contain _select and target for this to work
And you call it like this:
var foo = GetMyInstance<Lead>(forceClient);
or
var foo = GetMyInstance<SupplierProduct>(forceClient);
This likely won't suffice fully, but when I want to make multiple partial-similar methods accessible through a single function, I tend to pass target-strings and use a repeating switch-statement on them.
This way we can also run multiple actions after each other by supplying an array of targets.
Note; this code isn't debugged or anything, I just wanted to point you towards how switch-statements might be of some use to you. It wasn't possible for me to give a more comprehensive answer because I can't fully understand the intent of your code.
static async Task GetRecordsFor(ForceClient client, string[] targets )
{
foreach (string target in targets){
switch ( target )
{
case 'leads':
Console.WriteLine("Get Leads");
var accts = new List<Lead>();
// more specific code for fetching leads
break;
case 'suppliers':
Console.WriteLine("Get SupplierProduct");
var accts = new List<SupplierProduct>();
// more specific code for fetching suppliers
break;
}
// Actions you want to perform on each of these.
accts.AddRange(continuationResults.Records);
}
}

AWAIT multiple file downloads with DownloadDataAsync

I have a zip file creator that takes in a String[] of Urls, and returns a zip file with all of the files in the String[]
I figured there would be a number of example of this, but I cannot seem to find an answer to "How to download many files asynchronously and return when done"
How do I download {n} files at once, and return the Dictionary only when all downloads are complete?
private static Dictionary<string, byte[]> ReturnedFileData(IEnumerable<string> urlList)
{
var returnList = new Dictionary<string, byte[]>();
using (var client = new WebClient())
{
foreach (var url in urlList)
{
client.DownloadDataCompleted += (sender1, e1) => returnList.Add(GetFileNameFromUrlString(url), e1.Result);
client.DownloadDataAsync(new Uri(url));
}
}
return returnList;
}
private static string GetFileNameFromUrlString(string url)
{
var uri = new Uri(url);
return System.IO.Path.GetFileName(uri.LocalPath);
}
First, you tagged your question with async-await without actually using it. There really is no reason anymore to use the old asynchronous paradigms.
To wait asynchronously for all concurrent async operation to complete you should use Task.WhenAll which means that you need to keep all the tasks in some construct (i.e. dictionary) before actually extracting their results.
At the end, when you have all the results in hand you just create the new result dictionary by parsing the uri into the file name, and extracting the result out of the async tasks.
async Task<Dictionary<string, byte[]>> ReturnFileData(IEnumerable<string> urls)
{
var dictionary = urls.ToDictionary(
url => new Uri(url),
url => new WebClient().DownloadDataTaskAsync(url));
await Task.WhenAll(dictionary.Values);
return dictionary.ToDictionary(
pair => Path.GetFileName(pair.Key.LocalPath),
pair => pair.Value.Result);
}
public string JUST_return_dataURL_by_URL(string URL, int interval, int max_interval)
{
var client = new WebClient(proxy);
client.Headers = _headers;
string downloaded_from_URL = "false"; //default - until downloading
client.DownloadDataCompleted += bytes =>
{
Console.WriteLine("Done!");
string dataURL = Convert.ToBase64String( bytes );
string filename = Guid.NewGuid().ToString().Trim('{', '}')+".png";
downloaded_from_URL =
"Image Downloaded from " + URL
+ "<br>"
+ "<a href=\""+dataURL+"\" download=\""+filename+"\">"
+ "<img src=\"data:image/png;base64," + dataURL + "\"/>"+filename
+ "</a>"
;
return;
};
client.DownloadDataAsync(new System.Uri(URL));
int i = 0;
do{
// Console.WriteLine(
// "(interval > 10): "+(interval > 10)
// +"\n(downloaded_from_URL == \"false\"): " + (downloaded_from_URL == "false")
// +"\ninterval: "+interval
// );
Thread.Sleep(interval);
i+=interval;
}
while( (downloaded_from_URL == "false") && (i < max_interval) );
return downloaded_from_URL;
}
You'd be wanting the task.WaitAll method...
msdn link
Create each download as a separate task, then pass them as a collection.
A shortcut to this might be to wrap your download method in a task.
Return new Task<downloadresult>(()=>{ method body});
Apologies for vagueness, working on iPad sucks for coding.
EDIT:
Another implementation of this that may be worth considering is wrapping the downloads using the parallel framework.
Since your tasks all do the same thing taking a parameter, you could instead use Parallel.Foreach and wrap that into a single task:
public System.Threading.Tasks.Task<System.Collections.Generic.IDictionary<string, byte[]>> DownloadTask(System.Collections.Generic.IEnumerable<string> urlList)
{
return new System.Threading.Tasks.Task<System.Collections.Generic.IDictionary<string, byte[]>>(() =>
{
var r = new System.Collections.Concurrent.ConcurrentDictionary<string, byte[]>();
System.Threading.Tasks.Parallel.ForEach<string>(urlList, (url, s, l) =>
{
using (System.Net.WebClient client = new System.Net.WebClient())
{
var bytedata = client.DownloadData(url);
r.TryAdd(url, bytedata);
}
});
var results = new System.Collections.Generic.Dictionary<string, byte[]>();
foreach (var value in r)
{
results.Add(value.Key, value.Value);
}
return results;
});
}
This leverages a concurrent collection to support parallel access within the method before converting back to IDictionary.
This method returns a task so can be called with an await.
Hope this provides a helpful alternative.

C# false http response

I have a thread that returns a site's http response status, but sometimes my program returns false results. and after a while it gives good results.
False result:
it takes a big a mount of time to check, and then it says that (for example) Google is down, which is quite not reasonable, but after a few seconds it returns good results
Can you take a look and tell me whats wrong? or how I can I improve it?
Checks all sites in datagrid:
private void CheckSites()
{
if (CheckSelected())
{
int rowCount = dataGrid.BindingContext[dataGrid.DataSource, dataGrid.DataMember].Count;
string url;
for (int i = 0; i < rowCount; i++)
{
url = dataGrid.Rows[i].Cells[2].Value.ToString();
if (url != null)
{
Task<string[]> task = Task.Factory.StartNew<string[]>
(() => checkSite(url));
// We can do other work here and it will execute in parallel:
//Loading...
// When we need the task's return value, we query its Result property:
// If it's still executing, the current thread will now block (wait)
// until the task finishes:
string[] result = task.Result;
selectRows();
if (result[0] != System.Net.HttpStatusCode.OK.ToString() && result[0] != System.Net.HttpStatusCode.Found.ToString() && result[0] != System.Net.HttpStatusCode.MovedPermanently.ToString())
{
//bad
notifyIcon1.ShowBalloonTip(5000, "Site Down", dataGrid.Rows[i].Cells[2].Value.ToString() + ", has a status code of:" + result, ToolTipIcon.Error);
dataGrid.Rows[i].DefaultCellStyle.BackColor = System.Drawing.Color.Wheat;
TimeSpan ts;
TimeSpan timeTaken = TimeSpan.Parse(result[1]);
dataGrid.Rows[i].Cells[3].Value = result[0];
dataGrid.Rows[i].Cells[3].Style.BackColor = System.Drawing.Color.Red;
dataGrid.Rows[i].Cells[4].Value = timeTaken.Seconds.ToString() + "." + String.Format("{0:0.00000}", timeTaken.Milliseconds.ToString()) + " seconds.";
string sec = (DateTime.Now.Second < 10) ? "0" + DateTime.Now.Second.ToString() : DateTime.Now.Second.ToString();
string min = (DateTime.Now.Minute < 10) ? "0" + DateTime.Now.Minute.ToString() : DateTime.Now.Minute.ToString();
string hour = (DateTime.Now.Hour < 10) ? "0" + DateTime.Now.Hour.ToString() : DateTime.Now.Hour.ToString();
dataGrid.Rows[i].Cells[5].Value = hour + ":" + min + ":" + sec;
//loadbar
}
else if (result[0] == "catch")//catch
{
notifyIcon1.ShowBalloonTip(10000, "SITE DOWN", dataGrid.Rows[i].Cells[1].Value.ToString() + ", Error:" +result[1], ToolTipIcon.Error);
dataGrid.Rows[i].Cells[3].Value = result[1];
dataGrid.Rows[i].Cells[3].Style.BackColor = System.Drawing.Color.Red;
//loadbar
}
else
{
//good
TimeSpan timeTaken = TimeSpan.Parse(result[1]);
dataGrid.Rows[i].Cells[3].Value = result[0];
dataGrid.Rows[i].Cells[3].Style.BackColor = System.Drawing.Color.LightGreen;
dataGrid.Rows[i].Cells[4].Value = timeTaken.Seconds.ToString() + "." + String.Format("{0:0.00000}", timeTaken.Milliseconds.ToString()) + " seconds.";
string sec = (DateTime.Now.Second < 10) ? "0" + DateTime.Now.Second.ToString() : DateTime.Now.Second.ToString();
string min = (DateTime.Now.Minute < 10) ? "0" + DateTime.Now.Minute.ToString() : DateTime.Now.Minute.ToString();
string hour = (DateTime.Now.Hour < 10) ? "0" + DateTime.Now.Hour.ToString() : DateTime.Now.Hour.ToString();
dataGrid.Rows[i].Cells[5].Value = hour + ":" + min + ":" + sec;
//loadbar
}
selectRows();
}
}
}
}
Checks a site:
/////////////////////////////////
////Check datagrid websites-button - returns response
/////////////////////////////////
private string[] checkSite(string url)
{
string[] response = new string[2];
url = dataGrid.Rows[0].Cells[2].Value.ToString();
if (url != null)
{
try
{
HttpWebRequest httpReq;
httpReq.Timeout = 10000;
//loadbar
dataGrid.Rows[0].DefaultCellStyle.BackColor = System.Drawing.Color.Wheat;
System.Diagnostics.Stopwatch timer = new System.Diagnostics.Stopwatch();
timer.Start();
HttpWebResponse httpRes = (HttpWebResponse)httpReq.GetResponse(); //httpRes.Close();
timer.Stop();
//loadbar
HttpStatusCode httpStatus = httpRes.StatusCode;
response[0] = httpStatus.ToString();
response[1] = timer.Elapsed.ToString();//*
httpRes.Close();
return response;
}
catch (Exception he)
{
response[0] = "catch";
response[1] = he.Message;
return response;
}
}
response[0] = "catch";
response[1] = "No URL entered";
return response;
//dataGrid.Rows[i].DefaultCellStyle.BackColor = System.Drawing.Color.Blue;
}
Thanks in advance.
Assuming the code provided is the actual code used:
First of all, your definition of 'False result' and 'Good result' is wrong. If you expect A but get B, that doesn't mean B is invalid. If your wife is giving birth and you expect a boy but it turns out the be a girl, its not a false result. Just unexpected.
That said: lets analyze your work: If it takes a long long time to check a site only to finally get a ??? result which isn't a 200 response code. We can almost savely assume you are dealing with a timeout. If your router, google or any fundamental network device in between is having problems, its expected to get an unexpected answer. "Timeout", "Bad Request", "Server not available" etc. Why would this happen? Its impossible to say for certain without having direct access to your environment.
Looking at your code however, i see that you're using the default TaskScheduler for making each check run as a task in the background (assuming you havent changed the default task scheduler which would be a vey bad practice to begin with). The default task scheduler, schedules each task on the threadpool which results in many many tasks running simultanious. Here we have a good candidate for overloading your network. Many sites (esspecially google) are kinda sensitive for handling many requests from the same source (esspecially if the frequency is high) so maybe google is blocking you temporarily or holding you back. Again, at this point it's pure speculation but the fact that you're running all checks simultaniously (unless the thread pool is on his max) is very likely the cause of your problem.
UPDATE
I would recommend working with a LimitedConcurrencyTaskScheduler ( see here: http://blogs.msdn.com/b/pfxteam/archive/2010/04/09/9990424.aspx ). Here you can limit the amount of tasks that can be run asynchronously. You have to do some testing for what number works ideally in your situation. Also make sure that the frequency is not 'too' high. Its hard to define what is too high, only testing can proof that.
In order to simulate your scenario, I have created a Winform with data grid and a button. On load of the form, I programmatically creates list of url’s (in a table) and bind to data grid. And on button click, we start the download process. In concise, the you have to write more defensive code and the following code only a skeleton of how you can fix the issue.
using System;
using System.Data;
using System.Net;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace app
{
public partial class Form1 : Form
{
DataTable urls = new DataTable();
public Form1()
{
InitializeComponent();
}
//Fill your uri's and bind to a data grid.
void InitTable()
{
//Silly logic to simulate your scenario.
urls = new DataTable();
urls.Columns.Add(new DataColumn("Srl", typeof(string)));
urls.Columns.Add(new DataColumn("Urls", typeof(Uri)));
urls.Columns.Add(new DataColumn("Result", typeof(string)));
DataRow dr = urls.NewRow();
dr["Srl"] = "1";
dr["Urls"] = new Uri("http://www.microsoft.com");
dr["Result"] = string.Empty;
urls.Rows.Add(dr);
dr = urls.NewRow();
dr["Srl"] = "2";
dr["Urls"] = new Uri("http://www.google.com");
dr["Result"] = string.Empty;
urls.Rows.Add(dr);
dr = urls.NewRow();
dr["Srl"] = "3";
dr["Urls"] = new Uri("http://www.stackoverflow.com");
dr["Result"] = string.Empty;
urls.Rows.Add(dr);
urls.AcceptChanges();
}
void UpdateResult()
{
dataGridView1.DataSource = urls;
}
//Important
// This example will freeze UI. You can avoid this while implementing
//background worker or pool with some event synchronization. I haven't covered those area since
//we are addressing different issue. Let me know if you would like to address UI freeze
//issue. Or can do it your self.
private void button1_Click(object sender, EventArgs e)
{
//Create array for Task to parallelize multiple download.
var tasks = new Task<string[]>[urls.Rows.Count];
//Initialize those task based on number of Uri's
for(int i=0;i<urls.Rows.Count;i++)
{
int index = i;//Do not change this. This is to avoid data race
//Assign responsibility and start task.
tasks[index] = new Task<string[]>(
() => checkSite(
new TaskInput(urls.Rows[index]["Urls"].ToString(), urls.Rows[index]["Srl"].ToString())));
tasks[index].Start();
}
//Wait for all task to complete. Check other overloaded if interested.
Task.WaitAll(tasks);
//block shows how to access result from task
foreach (var item in tasks)
{
DataRow[] rows=urls.Select("Srl='"+item.Result[2]+"'");
foreach (var row in rows)
row["Result"]=item.Result[0]+"|"+item.Result[1];
}
UpdateResult();
}
//This is dummy method which in your case 'Check Site'. You can have your own
string[] checkSite(TaskInput input)
{
string[] response = new string[3];
if (input != null)
{
try
{
WebResponse wResponse = WebRequest.Create(input.Url).GetResponse();
response[0] = wResponse.ContentLength.ToString();
response[1] = wResponse.ContentType;
response[2] = input.Srl;
return response;
}
catch (Exception he)
{
response[0] = "catch";
response[1] = he.Message;
response[2] = input.Srl;
return response;
}
}
response[0] = "catch";
response[1] = "No URL entered";
response[2] = input.Srl;
return response;
}
private void Form1_Load(object sender, EventArgs e)
{
InitTable();
UpdateResult();
}
}
//Supply custom object for simplicity
public class TaskInput
{
public TaskInput(){}
public TaskInput(string url, string srl)
{
Url = url;
Srl = srl;
}
public string Srl { get; set; }
public string Url { get; set; }
}
}

Categories

Resources