Multithreaded c# console app to scrape data from sites - c#

I have written an app that goes through our own properties and scraps the data. To make sure I don't run through the same URLs, I am using a MySQL database to store the URL, flag it once its processed. All this was being done in a single thread and it's fine if I had only few thousand entries. But I have few hundred thousand entries that I need to parse so I need to make changes in the code (I am newbie in multithreading in general). I found an example and was trying to copy the style but doesn't seem to work. Anyone know what the issue is with the following code?
EDIT: Sorry didn't mean to make people guess the issue but was stupid of me to include the exception. Here is the exception
"System.InValidCastException: 'Specified cast is not valid.'"
When I start the process it collects the URLs from the database and then never hits DoWork method
//This will get the entries from the database
List<Mappings> items = bot.GetUrlsToProcess(100);
if (items != null)
{
var tokenSource = new CancellationTokenSource();
var token = tokenSource.Token;
Worker.Done = new Worker.DoneDelegate(WorkerDone);
foreach (var item in items)
{
urls.Add(item.Url);
WaitingTasks.Enqueue(new Task(id => new Worker().DoWork((int)id, item.Url, token), item.Url, token));
}
LaunchTasks();
}
static async void LaunchTasks()
{
// keep checking until we're done
while ((WaitingTasks.Count > 0) || (RunningTasks.Count > 0))
{
// launch tasks when there's room
while ((WaitingTasks.Count > 0) && (RunningTasks.Count < MaxRunningTasks))
{
Task task = WaitingTasks.Dequeue();
lock (RunningTasks) RunningTasks.Add((int)task.AsyncState, task);
task.Start();
}
UpdateConsole();
await Task.Delay(300); // wait before checking again
}
UpdateConsole(); // all done
}
static void UpdateConsole()
{
Console.Write(string.Format("\rwaiting: {0,3:##0} running: {1,3:##0} ", WaitingTasks.Count, RunningTasks.Count));
}
static void WorkerDone(int id)
{
lock (RunningTasks) RunningTasks.Remove(id);
}
public class Worker
{
public delegate void DoneDelegate(int taskId);
public static DoneDelegate Done { private get; set; }
public async void DoWork(object id, string url, CancellationToken token)
{
if (token.IsCancellationRequested) return;
Content obj;
try
{
int tries = 0;
bool IsUrlProcessed = true;
DateTime dtStart = DateTime.Now;
string articleDate = string.Empty;
try
{
ScrapeWeb bot = new ScrapeWeb();
SearchApi searchApi = new SearchApi();
SearchHits searchHits = searchApi.Url(url, 5, 0);
if (searchHits.Hits.Count() == 0)
{
obj = await bot.ReturnArticleObject(url);
if (obj.Code != HttpStatusCode.OK)
{
Console.WriteLine(string.Format("\r Status is {0}", obj.Code));
tries = itemfound.UrlMaxTries + 1;
IsUrlProcessed = false;
itemfound.HttpCode = obj.Code;
}
else
{
string title = obj.Title;
string content = obj.Contents;
string description = obj.Description;
Articles article = new Articles();
article.Site = url.GetSite();
article.Content = content;
article.Title = title;
article.Url = url.ToLower();
article.Description = description;
string strThumbNail = HtmlHelper.GetImageUrl(url, obj.RawResponse);
article.Author = HtmlHelper.GetAuthor(url, obj.RawResponse);
if (!string.IsNullOrEmpty(strThumbNail))
{
//This condition needs to be added to remove ?n=<number> from EP thumbnails
if (strThumbNail.Contains("?"))
{
article.ImageUrl = strThumbNail.Substring(0, strThumbNail.IndexOf("?")).Replace("http:", "https:");
}
else
article.ImageUrl = strThumbNail.Replace("http:", "https:");
}
else
{
article.ImageUrl = string.IsNullOrEmpty(strThumbNail) ? article.Url.GetDefaultImageUrls() : strThumbNail.Replace("http:", "https:");
}
articleDate = HtmlHelper.GetPublishDate(url, obj.RawResponse);
if (string.IsNullOrEmpty(articleDate))
article.Pubdate = DateTime.Now;
else
article.Pubdate = DateTime.Parse(articleDate);
var client = new Index(searchApi);
var result = client.Upsert(article);
itemfound.HttpCode = obj.Code;
if (result)
{
itemfound.DateCreated = DateTime.Parse(articleDate);
itemfound.DateModified = DateTime.Parse(articleDate);
UpdateItem(itemfound);
}
else
{
tries = itemfound.UrlMaxTries + 1;
IsUrlProcessed = false;
itemfound.DateCreated = DateTime.Parse(articleDate);
itemfound.DateModified = DateTime.Parse(articleDate) == null ? DateTime.Now : DateTime.Parse(articleDate);
UpdateItem(itemfound, tries, IsUrlProcessed);
}
}
}
else
{
tries = itemfound.UrlMaxTries + 1;
IsUrlProcessed = true;
itemfound.HttpCode = HttpStatusCode.OK;
itemfound.DateCreated = DateTime.Parse(articleDate);
itemfound.DateModified = DateTime.Parse(articleDate) == null ? DateTime.Now : DateTime.Parse(articleDate);
}
}
catch (Exception e)
{
tries = itemfound.UrlMaxTries + 1;
IsUrlProcessed = false;
itemfound.DateCreated = DateTime.Parse(articleDate);
itemfound.DateModified = DateTime.Parse(articleDate) == null ? DateTime.Now : DateTime.Parse(articleDate);
}
finally
{
DateTime dtEnd = DateTime.Now;
Console.WriteLine(string.Format("\r Total time taken to process items is {0}", (dtEnd - dtStart).TotalSeconds));
}
}
catch (Exception e)
{
Console.WriteLine(e);
}
Done((int)id);
}
}
All this code is based from Best multi-thread approach for multiple web requests this link. Can someone tell me how to get this approach running?

I think the problem is in the way you're creating your tasks:
new Task(id => new Worker().DoWork((int)id, item.Url, token), item.Url, token)
This Task constructor overload expected Action<object> delegate. That means id will be typed as object and you need to cast it back to something useful first.
Parameters
action
Type: System.Action<Object>
The delegate that represents the code to execute in the task.
state
Type: System.Object
An object representing data to be used by the action.
cancellationToken
Type: System.Threading.CancellationToken
-The CancellationToken that that the new task will observe.
You decided to cast it to int by calling (int)id, but you're passing item.Url as the object itself. I can't tell you 100% what the type of Url is but I don't expect Url-named property to be of type int.

Based on what #MarcinJuraszek said I just went back to my code and added an int as I couldn't find another way to resolve it. Here is the change I made
int i=0
foreach (var item in items)
{
urls.Add(item.Url);
WaitingTasks.Enqueue(new Task(id => new Worker().DoWork((string)id, item.Url, token), item.Url, token));
i++;
}

Related

Running a thread in the background when controller return to UI with full results - async call of a function from Controller method

Hi I have Controller method as below
[HttpPost]
public JsonResult Post(string vehiclesString, string Entity, int EntityId, ApplicationUser CurrentUser)
{
//https://stackify.com/understanding-asp-net-performance-for-reading-incoming-data/
List<Vehicle> vehicles = Newtonsoft.Json.JsonConvert.DeserializeObject<List<Vehicle>>(vehiclesString);
InputFieldController c = new InputFieldController();
var errors = new List<string>();
try
{
//let's get model for each of the input field
InputFieldController icController = new InputFieldController();
List<InputField> fields = icController.GetNamesValues("VehicleField", -1, "Vehicle", 0);
foreach (Vehicle vehicle in vehicles)
{
//convert array of strings into array of input fields
if (fields.Count != vehicle.ValueStrings.Count)
{
throw new Exception("Vehicle columns mismatch. Expected "
+ fields.Count + " fields, but received " + vehicle.ValueStrings.Count);
}
for (int i = 0; i < fields.Count; i++)
{
InputField field = fields[i];
string cell = vehicle.ValueStrings[i];
if ((cell != null || cell != String.Empty) && (field.Type == "radio" || field.Type == "dropdown"))
{
var f = field.InputDropdowns.Where(x => x.Name == cell).FirstOrDefault();
if (f != null)
{
field.InputValue.InputDropdownId = f.InputDropdownId;
}
else
field.InputValue.InputDropdownId = null;
}
else
{
field.InputValue.Value = cell;
}
vehicle.Values.Add(field);
}
vehicle.Blob = Newtonsoft.Json.JsonConvert.SerializeObject(vehicle.Values);
Vehicle v = new Vehicle();
if (vehicle.VehicleId == 0)
{
v = this.DomainLogicUnitOfWork.VehicleManager.Create(vehicle, Entity, EntityId);
}
}
JsonResult data = Json(new
{
success = true,
});
List<Vehicle> vehiclesList = this.DomainLogicUnitOfWork.VehicleManager.List(Entity, EntityId);
if (vehiclesList != null)
foreach (Vehicle v in vehiclesList)
{
if ((v != null) && (v.Blob != null))
v.Values = Newtonsoft.Json.JsonConvert.DeserializeObject<List<InputField>>(v.Blob);
}
//Task task = Task.Run(async () => await this.DomainLogicUnitOfWork.VehicleInfoManager.CreateOrUpdate(Entity, EntityId));
/*
* Here I have to call the this.DomainLogicUnitOfWork.VehicleInfoManager.CreateOrUpdate(string Entity, int EntityId) asynchronously
* but return the data without waiting for the CreateOrUpdate to complete
*/
System.Web.Hosting.HostingEnvironment.QueueBackgroundWorkItem(async cancellationToken =>
{
await this.DomainLogicUnitOfWork.VehicleInfoManager.CreateOrUpdate(vehiclesList, Entity, EntityId);
});
return data;
}
catch (Exception ex)
{
LogHandler.LogError(9000, "Error updating input fields", ex);
errors.Add("Error 9000:" + ex.Message);
return Json(new
{
error = ex.Message
});
}
}
And I have CreateOrUpdate method defined as below in VehicleInfoManager class
public async Task CreateOrUpdate(string Entity, int EntityId)
{
//do some stuff
var task = Task.Run(() => Test(Entity, EntityId));
//do other stuff
await task;
//some more stuff
}
And Test method is as follows
private void Test(string Entity, int EntityId)
{
List<VehicleInfo> addList; List<VehicleInfo> updateList;
try
{
this.GetAddAndUpdateList(Entity, EntityId, out addList, out updateList);
if ((addList != null) && (addList.Count > 0))
using (var cont = this.UnitOfWork.Context)
{
foreach (var a in addList)
{
cont.VehicleInfos.Add(a);
}
cont.SaveChanges();
}
if ((updateList != null) && (updateList.Count > 0))
using (var cont = this.UnitOfWork.Context)
{
foreach (var a in updateList)
{
var aa = cont.VehicleInfos?.Where(x => x.VehicleInfoId == a.VehicleInfoId)?.FirstOrDefault();
aa.Address_City = a.Address_City;
aa.Address_Country = a.Address_Country;
aa.Address_StateCode = a.Address_StateCode;
aa.Address_Street1 = a.Address_Street1;
aa.Address_Street2 = a.Address_Street2;
aa.Address_Zip = a.Address_Zip;
aa.ChassisYear = a.ChassisYear;
aa.EngineFamilyName = a.EngineFamilyName;
aa.Entity = a.Entity;
aa.EntityId = a.EntityId;
aa.InputFieldEntity = a.InputFieldEntity;
aa.InputFieldEntityId = a.InputFieldEntityId;
aa.InputFieldGroup = a.InputFieldGroup;
aa.LicensePlate = a.LicensePlate;
aa.Manufacturer = a.Manufacturer;
aa.ModelYear = a.ModelYear;
aa.PurchasedDate = a.PurchasedDate;
aa.RegHoldClearBy = a.RegHoldClearBy;
aa.RegHoldClearDate = a.RegHoldClearDate;
aa.RegHoldComment = a.RegHoldComment;
aa.RegHoldSet = a.RegHoldSet;
aa.RegHoldSetBy = a.RegHoldSetBy;
aa.RegHoldSetDate = a.RegHoldSetDate;
aa.TrailerPlate = a.TrailerPlate;
aa.UpdatedBy = a.UpdatedBy;
aa.UpdatedDate = a.UpdatedDate;
aa.VehicleId = a.VehicleId;
aa.VehicleOperator = a.VehicleOperator;
aa.VehicleOwner = a.VehicleOwner;
aa.VIN = a.VIN;
}
cont.SaveChanges();
}
}
catch (Exception ex)
{
ARB.Logging.LogHandler.LogError(9001, "CreateOrUpdate(string Entity, int EntityId) in class VehicleInfoManager", ex);
throw ex;
}
}
What I want is, I want two things here
the Post method to call or start the CreateOrUpdate method as background call but instead of waiting until the CreateOrUpdate method finishes, it should return the result data to UI and continue the big task CreateOrUpdate in the background.
Is there anyway to start the background method CreateOrUpdate after sometime like (10 mins etc) post method returns to UI, if it can't be done, its OK we don't have to worry but just asking if there is anyway to trigger this from within the same application
When I implemented it in the above way, even after using System.Web.Hosting.HostingEnvironment.QueueBackgroundWorkItem I am still getting the null http context at the following location
user = System.Web.HttpContext.Current.User.Identity.Name; url = System.Web.HttpContext.Current.Request.Url.ToString();
System.Web.HttpContext.Current is coming out as null.
and the application is breaking,
I chaned my async call to the following to use HostingEnvironment.QueueBackgroundWorkItem, still the same htt current context is coming out as null, any help please
System.Web.Hosting.HostingEnvironment.QueueBackgroundWorkItem(async cancellationToken =>
{
await this.DomainLogicUnitOfWork.VehicleInfoManager.CreateOrUpdate(Entity, EntityId);
});
Thank you
System.Web.HttpContext.Current is maintained by the ASP.NET's SynchronizationContext.
When you start a new Task, the code will be executing on another thread pool thread without a SynchronizationContext and the value of System.Web.HttpContext.Current is not safe to use, whatever its value.
When the execution of the action method (Post) ends, the request will end and the HttpContext instance will be invalid, even if you mange to get a reference to it.
Also, there is no guarantee that that code you posted to the thread pool will run to complete, since it's out of ASP.NET control and ASP.NET won't be aware of it if IIS decides, for some reason, to recycle the application pool or the web application.
If you to post background work, use HostingEnvironment.QueueBackgroundWorkItem. Beware if its constraints.

How to get parallel access to the method for multiple users

My telegram bot is necessary so that the user can answer questions in order and save these answers in the same order for a specific user in parallel.
static readonly ConcurrentDictionary<int, string[]> Answers = new ConcurrentDictionary<int, string[]>();
static void Main(string[] args)
{
try
{
Task t1 = CreateHostBuilder(args).Build().RunAsync();
Task t2 = BotOnMessage();
await Task.WhenAll(t1, t2);
}
catch (Exception ex)
{
Console.WriteLine("Error" + ex);
}
}
here is my BotOnMessage() method to receive and process messages from users
async static Task BotOnMessage()
{
int offset = 0;
int timeout = 0;
try
{
await bot.SetWebhookAsync("");
while (true)
{
var updates = await bot.GetUpdatesAsync(offset, timeout);
foreach (var update in updates)
{
var message = update.Message;
if (message.Text == "/start")
{
Registration(message.Chat.Id.ToString(), message.Chat.FirstName.ToString(), createdDateNoTime.ToString("yyyy-MM-dd HH:mm:ss.fff", CultureInfo.InvariantCulture));
var replyKeyboard = new ReplyKeyboardMarkup
{
Keyboard = new[]
{
new[]
{
new KeyboardButton("eng"),
new KeyboardButton("ger")
},
}
};
replyKeyboard.OneTimeKeyboard = true;
await bot.SendTextMessageAsync(message.Chat.Id, "choose language", replyMarkup: replyKeyboard);
}
switch (message.Text)
{
case "eng":
var replyKeyboardEN = new ReplyKeyboardMarkup
{
Keyboard = new[]
{
new[]
{
new KeyboardButton("choice1"),
new KeyboardButton("choice2")
},
}
};
replyKeyboardEN.OneTimeKeyboard = true;
await bot.SendTextMessageAsync(message.Chat.Id, "Enter choice", replyMarkup: replyKeyboardEN);
await AnonymEN();
break;
case "ger":
var replyKeyboardGR = new ReplyKeyboardMarkup
{
Keyboard = new[]
{
new[]
{
new KeyboardButton("choice1.1"),
new KeyboardButton("choice2.2")
},
}
};
replyKeyboardGR.OneTimeKeyboard = true;
await bot.SendTextMessageAsync(message.Chat.Id, "Enter choice", replyMarkup: replyKeyboardGR);
await AnonymGR();
break;
}
offset = update.Id + 1;
}
}
}
catch (Exception ex)
{
Console.WriteLine("Error" + ex);
}
}
and AnonymEN() method for eng case in switch. The problem appears here when I call this method from switch case in BotOnMessage(). Until switch (message.Text) multiple users can asynchronously send messages and get response. When first user enters AnonymEN() second user can't get response from this method until first user will finish it till the end. Also I call BotOnMessage() in the end of AnonymEN() to get back for initial point with possibility to start bot again. For the ordered structure of questions and answers I used ConcurrentDictionary way from here Save user messages sent to bot and send finished form to other user. Any suggestion and solution how to edit code to make this bot available for multiple users at one time?
async static Task AnonymEN()
{
int offset = 0;
int timeout = 0;
try
{
await bot.SetWebhookAsync("");
while (true)
{
var updates = await bot.GetUpdatesAsync(offset, timeout);
foreach (var update in updates)
{
var message = update.Message;
int userId = (int)message.From.Id;
if (message.Type == MessageType.Text)
{
if (Answers.TryGetValue(userId, out string[] answers))
{
var title = message.Text;
if (answers[0] == null)
{
answers[0] = message.Text;
await bot.SendTextMessageAsync(message.Chat, "Enter age");
}
else
{
SaveMessage(message.Chat.Id.ToString(), "anonym", "anonym", "anonym", answers[0].ToString(), title.ToString(), createdDateNoTime.ToString("yyyy-MM-dd HH:mm:ss.fff", CultureInfo.InvariantCulture));
Answers.TryRemove(userId, out string[] _);
await bot.SendTextMessageAsync(message.Chat.Id, "ty for request click /start");
await BotOnMessage();
}
}
else if (message.Text == "choice1")
{
Answers.TryAdd(userId, new string[1]);
await bot.SendTextMessageAsync(message.Chat.Id, "Enter name");
}
}
offset = update.Id + 1;
}
}
}
catch (Exception ex)
{
Console.WriteLine("Error" + ex);
}
}
I can see multiple issues with your code:
It is hard to read. While this is a personal preference I strongly advise to write short concise methods that have 1 responsibility. This will make it easier to understand and maintain your code. https://en.wikipedia.org/wiki/Single-responsibility_principle
Everything is static. This makes it very hard to keep track of any state such as language that should be tracked per user.
Using infinite loops and recursion with no escape. I highly doubt this was intended but you could get an infinite chain of calls like this BotOnMessage -> AnonymEN -> BotOnMessage -> AnonymEN. I think you want to exit the AnonymEN function using either a return, break or while(someVar) approach instead of calling the BotOnMessage function.
If two users are sending messages you get mixed responses. Example message flow user1: /start, user1: eng, user2: hello. The bot will now give an english response to user2. I'm sure this is not intended
The code below is a minimal example that addresses the issues I mentioned. It is not perfect code but should help you get started.
private Dictionaty<string, UserSession> userSessions = new ();
async Task BotOnMessage()
{
try
{
while(true)
{
var message = await GetMessage(timeout);
var userSession = GetUserSession(message.user);
userSession.ProcessMessage(message);
}
}
catch(){}
}
async void GetUserSession(string user)
{
if(!userSessions.HasKey(user))
{
userSessions[user](new Session());
}
return userSessions[user];
}
public class UserSession
{
public async Task ProcessMessage(message)
{
// Existing message processing code goes here.
// Do not use a loop or recursion.
// Instead track the state (e.g. languge) using fields.
}
}

Is it safe to call timer callback method like this?

Please correct me if I have some errors in this logic (not some elegancy things like getting rid of constructor initialization and using Init method instead for Poll). I have not had experience with timer callbacks so far. The code is pretty self-explanatory, I hope. What confuses me a bit is some mix of async things (like connection client creation) and further code - though, I just reused IClient class, it's not mine):
public async Task<WaitForLoanActivationDto> WaitForLoanActivation(string userName, string accountGuid, int timeout)
{
const int dueTime = 0;
const int pollPeriod = 500;
Poll<WaitForLoanActivationDto> state = new Poll<WaitForLoanActivationDto>
{
Client = await _rpcConnectionPool.GetClientAsync(userName),
AutoResetEvent = new AutoResetEvent(false),
StartTime = DateTime.Now,
Timeout = timeout,
Parameters = new Variant[] { accountGuid },
Result = new WaitForLoanActivationDto { Active = false }
};
Timer timer = new Timer(new TimerCallback(WaitForLoanActivationCallback), state, dueTime, pollPeriod);
state.AutoResetEvent.WaitOne();
timer.Dispose(state.AutoResetEvent);
if (state.ThreadException != null)
{
throw state.ThreadException;
}
return state.Result;
}
private void WaitForLoanActivationCallback(object state)
{
Poll<WaitForLoanActivationDto> pollState = (Poll<WaitForLoanActivationDto>)state;
if (pollState.StartTime.AddMilliseconds(pollState.Timeout) >= DateTime.Now)
{
try
{
using (RPCReply reply = ResultHelper.Check(pollState.Client.ExecuteRemoteCommand(WaitForLoanActivationRpcName, pollState.Parameters)))
{
pollState.Result.Active = reply[2].IDList["Active"].AsBoolean().Value;
VariantList statusList = reply[2].IDList["statuses"].List;
if (statusList.Count > 0)
{
var statuses = CustomerInformationConverter.GetStatusesList(statusList);
pollState.Result.Statuses = statuses.ToArray();
}
if (pollState.Result.Active)
{
pollState.AutoResetEvent.Set();
}
}
}
catch (Exception ex)
{
pollState.Result = null;
pollState.ThreadException = ex;
pollState.AutoResetEvent.Set();
}
}
else
{
pollState.AutoResetEvent.Set();
}
}
Thank you guys.
#ckuri, based on your idea I came up with the code below. I have not used Task.Delay though, because as I understood it creates delay even if the task is successfully complete - after it. The objective of my case is to run RPC method each pollPeriod milliseconds during timeout milliseconds. It the method returns Active == false - keep polling, otherwise - return the result. RPC execution time might take more than pollPeriod milliseconds, so if it's already running - no sense to spawn another task.
public async Task<WaitForLoanActivationDto> WaitForLoanActivation(string userName, string accountGuid, int timeout)
{
var cancellationTokenSource = new CancellationTokenSource();
try
{
const int pollPeriod = 500;
IClient client = await _rpcConnectionPool.GetClientAsync(userName);
DateTime startTime = DateTime.Now;
WaitForLoanActivationDto waitForLoanActivationDto = null;
while (startTime.AddMilliseconds(timeout) >= DateTime.Now)
{
waitForLoanActivationDto = await Task.Run(() => WaitForLoanActivationCallback(client, accountGuid), cancellationTokenSource.Token);
if (waitForLoanActivationDto.Active)
{
break;
}
else
{
await Task.Delay(pollPeriod, cancellationTokenSource.Token);
}
}
return waitForLoanActivationDto;
}
catch (AggregateException ex)
{
cancellationTokenSource.Cancel();
throw ex.InnerException;
}
}
private WaitForLoanActivationDto WaitForLoanActivationCallback(IClient client, string accountGuid)
{
using (RPCReply reply = ResultHelper.Check(client.ExecuteRemoteCommand(WaitForLoanActivationRpcName, accountGuid)))
{
var waitForLoanActivationDto = new WaitForLoanActivationDto
{
Active = reply[2].IDList["Active"].AsBoolean().Value
};
VariantList statusList = reply[2].IDList["statuses"].List;
if (statusList.Count > 0)
{
var statuses = CustomerInformationConverter.GetStatusesList(statusList);
waitForLoanActivationDto.Statuses = statuses.ToArray();
}
return waitForLoanActivationDto;
}
}

Database async query and processing

Let me rephrase.
I have a method that generates strings(paths) after given a start string(path)
IF those paths are for a directory I want to enqueue that in the input of the method.
After processing the path synchronously, I want to get the Data and clone it async into multiple paths of a pipeline, were each path needs to get the datablock. So the Broadcastblock is out of the question (it cant send a blocking signal to the blocks before itself),
The joinblock, joining the results is relatively straight forward.
So to sum up
Is there a Block in Dataflow block, where i can access the inputqueue from the delegate, if when, how?
Is there a construct that acts like the broadcastblock but can block the blocks that came before it?
I tried doing it via almighty google:
class subversion
{
private static string repo;
private static string user;
private static string pw;
private static DateTime start;
private static DateTime end;
private static List<parserObject> output;
public static List<parserObject> svnOutputList
{
get {return output; }
}
private static List<string> extension_whitelist;
public async void run(string link, string i_user, string i_pw, DateTime i_start, DateTime i_end)
{
repo = link;
user = i_user;
pw = i_pw;
start = i_start;
end = i_end;
output = new List<parserObject>();
BufferBlock<string> crawler_que = new BufferBlock<string>();
BufferBlock<svnFile> parser_que = new BufferBlock<svnFile>();
var svn = crawl(crawler_que, parser_que);
var broadcaster = new ActionBlock<svnFile>(async file =>
{//tried to addapt the code from this ensure always send broadcastblock -> see link below
List<Task> todo = new List<Task>();
todo.Add(mLoc);//error cannot convert methodgroup to task
foreach (var task in todo)//error: Only assignment, call, increment, decrement, await, and new object expressions can be used as a statement?
{
task.SendAsync(file);//error cannot convert task to targetblock
}
await Task.WhenAll(todo.ToArray());
});
parser_que.LinkTo(broadcaster);
await Task.WhenAll(broadcaster, svn);//error cannot convert actionblock to task
}
private static async Task crawl(BufferBlock<string> in_queue, BufferBlock<svnFile> out_queue)
{
SvnClient client = new SvnClient();
client.Authentication.ForceCredentials(user, pw);
SvnListArgs arg = new SvnListArgs
{
Depth = SvnDepth.Children,
RetrieveEntries = SvnDirEntryItems.AllFieldsV15
};
while (await in_queue.OutputAvailableAsync())
{
string buffer_author = null;
string prev_author = null;
System.Collections.ObjectModel.Collection<SvnListEventArgs> contents;
string link = await in_queue.ReceiveAsync();
if (client.GetList(new Uri(link), arg, out contents))
{
foreach (SvnListEventArgs item in contents)
{
if (item.Entry.NodeKind == SvnNodeKind.Directory)
{
in_queue.Post(item.Path);
}
else if (item.Entry.NodeKind == SvnNodeKind.File)
{
try
{
int length = item.Name.LastIndexOf(".");
if (length <= 0)
{
continue;
}
string ext = item.Name.Substring(length);
if (extension_whitelist.Contains(ext))
{
Uri target = new Uri((repo + link));
SvnRevisionRange range;
SvnBlameArgs args = new SvnBlameArgs
{
Start = start.AddDays(-1),
End = end
};
try
{
svnFile file_instance = new svnFile();
client.Blame(target, args, delegate(object sender3, SvnBlameEventArgs e)
{
if (e.Author != null)
{
buffer_author = e.Author;
prev_author = e.Author;
}
else
{
buffer_author = prev_author;
}
file_instance.lines.Add(new svnLine(buffer_author, e.Line));
});
out_queue.Post(file_instance);
}
catch (Exception a) { Console.WriteLine("exception:" + a.Message);}
}
}
catch (Exception a)
{
}
}
}
}
}
}
private static async Task mLoc(svnFile file)
{
List<parserPart> parts = new List<parserPart>();
int find;
foreach (svnLine line in file.lines)
{
if ((find = parts.FindIndex(x => x.uploader_id == line.author)) > 0)
{
parts[find].count += 1;
}
else
{
parts.Add(new parserPart(line.author));
}
find = 0;
}
parserObject ret = new parserObject(parts, "mLoc");
await output.Add(ret);
return;
}
}
broadcastblock answer: Alternate to Dataflow BroadcastBlock with guaranteed delivery

How to create async http requests in a loop?

Yesterday I've found out how to create several async http requests without async/await. But today I need to do it in a loop: if some of responses don't satisfy some condition - I need to change a request for them and send these requests again. It may be repeated several times.
I've tried this code:
do
{
var loadingCoordinatesTasks = new List<Task<Terminal>>();
var totalCountOfTerminals = terminalPresetNode.ChildNodes.Count;
var uiTaskScheduler = TaskScheduler.FromCurrentSynchronizationContext();
foreach (var terminal in terminals.Except(_terminalsWithCoordinates))
{
var address = terminal.GetNextAddress();
var webRequest = (HttpWebRequest)WebRequest.Create(GeoCoder.GeoCodeUrl + address);
var webRequestTask = Task.Factory.FromAsync<WebResponse>(webRequest.BeginGetResponse,
webRequest.EndGetResponse,
terminal);
var parsingTask = webRequestTask.ContinueWith(antecedent =>
{
// Parse the response
});
loadingCoordinatesTasks.Add(parsingTask);
}
Task.Factory.ContinueWhenAll(loadingCoordinatesTasks.ToArray(), antecedents =>
{
foreach (var antecedent in antecedents)
{
var terminalWithCoordinates = antecedent.Result;
if (antecedent.Status == TaskStatus.RanToCompletion &&
!terminalWithCoordinates.Coordinates.AreUnknown)
{
_terminalsWithCoordinates.Add(terminalWithCoordinates);
_countOfProcessedTerminals++;
}
}
});
} while (_countOfProcessedTerminals < totalCountOfTerminals);
but is it possible to check the condition in while just after every single set of requests executed?
You can perform the check after increasing the count:
_countOfProcessedTerminals++;
if (_countOfProcessedTerminals >= totalCountOfTerminals)
{
break;
}
Is _countOfProcessedTerminals thread-safe though?
I manage to do it using recursion:
public void RunAgainFailedTasks(IEnumerable<Task<Terminal>> tasks)
{
Task.Factory.ContinueWhenAll(tasks.ToArray(), antecedents =>
{
var failedTasks = new List<Task<Terminal>>();
foreach (var antecedent in antecedents)
{
var terminal = antecedent.Result;
// Previous request was failed
if (terminal.Coordinates.AreUnknown)
{
string address;
try
{
address = terminal.GetNextAddress();
}
catch (FormatException) // No versions more
{
continue;
}
var getCoordinatesTask = CreateGetCoordinatesTask(terminal, address);
failedTasks.Add(getCoordinatesTask);
}
else
{
_terminalsWithCoordinates.Add(terminal);
}
}
if (failedTasks.Any())
{
RunAgainFailedTasks(failedTasks);
}
else
{
// Display a map
}
}, CancellationToken.None,
TaskContinuationOptions.None,
TaskScheduler.FromCurrentSynchronizationContext());
}
private Task<Terminal> CreateGetCoordinatesTask(Terminal terminal, string address)
{
var webRequest = (HttpWebRequest)WebRequest.Create(GeoCoder.GeoCodeUrl + address);
webRequest.KeepAlive = false;
webRequest.ProtocolVersion = HttpVersion.Version10;
var webRequestTask = Task.Factory.FromAsync<WebResponse>(webRequest.BeginGetResponse,
webRequest.EndGetResponse,
terminal);
var parsingTask = webRequestTask.ContinueWith(webReqTask =>
{
// Parse the response
});
return parsingTask;
}

Categories

Resources