I have an web application where I parse a csv file that can have over 200k records in it. I parse each line for information, verify that the key does not exist in the database and then add it to the context. When the count reaches 10,000 records it calls SaveChanges routine. The problem is that there can be duplicates in the context and it errors out. This is running on a Azure VM communicating to an Azure SQL server.
Two questions, how do I handle the duplicate issue and is there any way I can improve the speed as it takes several hours to run?
using (LoanFileEntities db = new LoanFileEntities())
{
db.Configuration.AutoDetectChangesEnabled = false; // 1. this is a huge time saver
db.Configuration.ValidateOnSaveEnabled = false; // 2. this can also save time
while (parser.Read())
{
counter++;
int loan_code = 0;
string loan_code_string = parser["LoanId"];
string dateToParse = parser["PullDate"].Trim();
DateTime date_pulled;
try
{
date_pulled = DateTime.Parse(dateToParse, CultureInfo.InvariantCulture);
}
catch (Exception)
{
throw new Exception("No Pull Date for line " + counter);
}
string originationdate = parser["OriginationDate"].Trim();
DateTime date_originated;
try
{
date_originated = DateTime.Parse(originationdate, CultureInfo.InvariantCulture);
}
catch (Exception)
{
throw new Exception("No Origination Date for line " + counter);
}
dateToParse = parser["DueDate"].Trim();
DateTime date_due;
try
{
date_due = DateTime.Parse(dateToParse, CultureInfo.InvariantCulture);
}
catch (Exception)
{
throw new Exception("No Due Date for line " + counter);
}
string region = parser["Region"].Trim();
string source = parser["Channel"].Trim();
string password = parser["FilePass"].Trim();
decimal principalAmt = Convert.ToDecimal(parser["Principal"].Trim());
decimal totalDue = Convert.ToDecimal(parser["TotalDue"].Trim());
string vitaLoanId = parser["VitaLoanId"];
var toAdd =
db.dfc_LoanRecords.Any(
x => x.loan_code_string == loan_code_string);
if (!toAdd)
{
dfc_LoanRecords loan = new dfc_LoanRecords();
loan.loan_code = loan_code;
loan.loan_code_string = loan_code_string;
loan.loan_principal_amt = principalAmt;
loan.loan_due_date = date_due;
loan.date_pulled = date_pulled;
loan.date_originated = date_originated;
loan.region = region;
loan.source = source;
loan.password = password;
loan.loan_amt_due = totalDue;
loan.vitaLoanId = vitaLoanId;
loan.load_file = fileName;
loan.load_date = DateTime.Now;
switch (loan.region)
{
case "UK":
if (location.Equals("UK"))
{
//db.dfc_LoanRecords.Add(loan);
if (loan.source == "Online")
{
counter_new_uk_online++;
}
else
{
counter_new_uk_retail++;
}
}
break;
case "US":
if (location.Equals("US"))
{
db.dfc_LoanRecords.Add(loan);
if (loan.source == "Online")
{
counter_new_us_online++;
}
else
{
counter_new_us_retail++;
}
}
break;
case "Canada":
if (location.Equals("US"))
{
db.dfc_LoanRecords.Add(loan);
if (loan.source == "Online")
{
counter_new_cn_online++;
}
else
{
counter_new_cn_retail++;
}
}
break;
}
// delay save to speed up load. 3. also saves transactional time
if (counter % 10000 == 0)
{
db.SaveChanges();
}
}
} // end of parser read
db.SaveChanges();
}
}
}
I would suggest removing duplicates in the code before sending it over to .SaveChanges().
Instead of going into detail about duplicate removal, I've put together this list of links to existing questions and answers on StackOverflow that may help:
Delete duplicates using Lambda
Using DISTINCT on a subquery to remove duplicates in Entity Framework
Using LINQ to find / delete duplicates
Hope that helps!
Related
I'm fetching company names and other data from QB file with the following code using QB-SDK:
public IList<CustomerModelQB> GetAllCustomer(string fromName = "a", string toName = "z", bool IsActiveOnly = true)
{
RequestMsgSet.ClearRequests();
ICustomerQuery CustomerQueryRq = RequestMsgSet.AppendCustomerQueryRq();
if (IsActiveOnly)
{
if (CustomerQueryRq != null)
CustomerQueryRq.ORCustomerListQuery.CustomerListFilter.ActiveStatus.SetValue(
ENActiveStatus.asActiveOnly);
}
else
CustomerQueryRq.ORCustomerListQuery.CustomerListFilter.ActiveStatus.SetValue(ENActiveStatus.asAll);
//Set field value for FromName
CustomerQueryRq.ORCustomerListQuery.CustomerListFilter.ORNameFilter.NameRangeFilter.FromName.SetValue(fromName);
//Set field value for ToName
CustomerQueryRq.ORCustomerListQuery.CustomerListFilter.ORNameFilter.NameRangeFilter.ToName.SetValue(toName);
CustomerQueryRq.IncludeRetElementList.Add("FullName");
CustomerQueryRq.IncludeRetElementList.Add("AccountNumber");
ResponseMsgSet = SessionManager.DoRequests(RequestMsgSet);
return WalkCustomerQuery(ResponseMsgSet);
}
I looked at the iterator and tried some code..
It seems that it is fetching initial data like fetch first one thousand records and that's it... The logic will be like fetch first few records of data, than the next few records and so on until the total of the records fetched.... But unfortunately QB SDK is not giving this facility, as it is only letting me to fetch the first few records and that's all...
What I actually want to do is:
I have few 100k records in my QB company file and I would like to fetch first few records (like ten thousand records) than move to the next 10 thousand records, and than next 10 thousands and so on... until all the records fetched.
Although am able to do this with ORNameFilter, TotalBalanceFilter and some other filters but I want to do this like first 10 thousand records than next 10 thousand, and so on until total of the records from my company file.
This is actually continuation of this SO question.
Is there any way around to do this?
Here our code to get invoices from date range. It use QBFC.
public List<tbInvoiceHeader> GetInvoices(DateTime? fromDate, DateTime? toDate, bool fromModifiedDate, string invoiceNumber)
{
var invoices = new List<tbInvoiceHeader>();
IMsgSetRequest requestMsgSet;
IMsgSetResponse responseMsgSet;
requestMsgSet = GetLatestMsgSetRequest();
requestMsgSet.Attributes.OnError = ENRqOnError.roeContinue;
IInvoiceQuery invoiceQuery = requestMsgSet.AppendInvoiceQueryRq();
IInvoiceFilter invoiceFilter = invoiceQuery.ORInvoiceQuery.InvoiceFilter;
if (!string.IsNullOrEmpty(invoiceNumber))
{
invoiceFilter.ORRefNumberFilter.RefNumberFilter.RefNumber.SetValue(invoiceNumber);
invoiceFilter.ORRefNumberFilter.RefNumberFilter.MatchCriterion.SetValue(ENMatchCriterion.mcStartsWith);
}
else
{
if (fromDate.HasValue)
{
if (!fromModifiedDate)
{
invoiceFilter.ORDateRangeFilter.TxnDateRangeFilter.ORTxnDateRangeFilter.TxnDateFilter.FromTxnDate.SetValue(fromDate.Value);
}
else
{
invoiceFilter.ORDateRangeFilter.ModifiedDateRangeFilter.FromModifiedDate.SetValue(fromDate.Value, asDateOnly: true);
}
}
if (toDate.HasValue)
{
if (!fromModifiedDate)
{
invoiceFilter.ORDateRangeFilter.TxnDateRangeFilter.ORTxnDateRangeFilter.TxnDateFilter.ToTxnDate.SetValue(toDate.Value);
}
else
{
invoiceFilter.ORDateRangeFilter.ModifiedDateRangeFilter.ToModifiedDate.SetValue(toDate.Value, asDateOnly: true);
}
}
}
invoiceFilter.MaxReturned.SetValue(iterationNumber); // Set max returns element.
invoiceQuery.iterator.SetValue(ENiterator.itStart);
invoiceQuery.IncludeLinkedTxns.SetValue(true);
invoiceQuery.IncludeLineItems.SetValue(true);
invoiceQuery.OwnerIDList.Add("0"); // To include customs fields
responseMsgSet = mySessionManager.DoRequests(requestMsgSet);
do
{
//Step 5: Interpret the response
IResponseList rsList = responseMsgSet.ResponseList;
//Retrieve the one response corresponding to our single request
IResponse response = rsList.GetAt(0);
if (response.StatusCode == 0) //We have one or more invoices-> show them
{
IInvoiceRetList invoiceList = response.Detail as IInvoiceRetList;
int maxCnt = invoiceList.Count;
if (invoiceProgressEvent != null)
{
invoiceProgressEvent(new ProgressEvent() { Count = maxCnt, RemainingCnt = response.iteratorRemainingCount, Invoices = invoices });
}
//for logging only
//XmlDocument doc = new XmlDocument();
//doc.LoadXml(responseMsgSet.ToXMLString());
//XmlNodeList nodes = doc.SelectNodes("//InvoiceRet");
for (int ndx = 0; ndx < maxCnt; ndx++)
{
//var xmlText = nodes[ndx].InnerXml;
IInvoiceRet invoiceRet = invoiceList.GetAt(ndx);
invoices.Add(GetInvoiceHeaderDetail(invoiceRet));
}
}
if (response.iteratorRemainingCount > 0)
{
invoiceQuery.iteratorID.SetValue(response.iteratorID);
invoiceQuery.iterator.SetValue(ENiterator.itContinue);
responseMsgSet = mySessionManager.DoRequests(requestMsgSet);
}
else
{
//This cause The iteratorID "..." is not valid.
//invoiceQuery.iteratorID.SetValue(response.iteratorID);
//invoiceQuery.iterator.SetValue(ENiterator.itStop);
//responseMsgSet = mySessionManager.DoRequests(requestMsgSet);
break;
}
} while (true);
return invoices;
}
Is there any way I can run a function in parallel with batches of a large dataset on multiple threads in c#?
So I have a list of data with approximate size of 32000 lines. I run the function below which reads each line of the dataset and verifies it. The idea is to separate the dataset into chunks of 5000 and concurrently apply the function below to each chunk/batch.
private void AccountNumberCheck(List<Invoice> invoices, string VendorID)
{
try
{
using (var context = new ApplicationContext())
{
foreach (var invoice in invoices)
{
var invoiceDB = context.Invoices.Find(invoice.Id);
var accountNumber = context.Accounts.Where(m => m.Account_Number == invoice.Account_Number && m.VendorID == VendorID);
if (accountNumber.Count() > 0)
{
var activeAccount = accountNumber.Any(m => m.Active_Status == false);
if (activeAccount == true)
{
invoiceDB.ExceptionFlag = true;
invoiceDB.ExceptionComments = invoiceDB.ExceptionComments + "The Account Number is Inactive.";
}
else
{
invoiceDB.ExceptionFlag = false;
}
}
else
{
invoiceDB.ExceptionFlag = true;
invoiceDB.ExceptionComments = invoiceDB.ExceptionComments + "The Account Number does not exist. ";
}
context.Entry(invoiceDB).State = EntityState.Modified;
context.SaveChanges();
}
}
}
catch (Exception ex)
{
}
}
I'm trying a booking system, I want to put controls on the booking aspect. I want to use If and then cases. I want to control in such a way that if number of booking is 4, then it will throw an exception and stop inserting in the database.
public ApiResult<TimeModelExtended> SaveBooking(Booking booking)
{
AchimotaGCDb repo = new AchimotaGCDb();
var result = new ApiResult<TimeModelExtended>();
try
{
booking.PlayDate = getPlayDate(booking.RefCode);
Int16 nb = getNbBooked(booking.RefCode);
if (nb == 4)
{
Exception ex = new Exception();
result.Successfull = 0;
result.InternalError = ex.Message;
result.Error = "Booking slot is full";
}
else if (nb == 0)
{
booking.BookingStatus = 1;//Booked already
}
else
{
booking.BookingStatus = 0;//Reservation already
}
repo.Insert(booking);
result.Successfull = 1;
result = GetOneteeTime(booking.RefCode);
}
catch (Exception ex)
{
result.Successfull = 0;
result.InternalError = ex.Message;
result.Error = "Error from server";
}
finally
{
repo.Dispose();
}
return result;
}
help to solve that.
If you want to throw an exception, you need to really throw it:
if (nb == 4)
{
throw new Exception("Booking slot is full.");
}
But I don't think throwing an exception is a good idea. Throwing an exception and validation is a different thing.
Here is my suggestion:
if (nb == 4)
{
return result = new ApiResult<TimeModelExtended>()
{
Successfull = 0,
InternalError = "Other messages",
Error = ""Booking slot is full."
};
}
This will return as result message that nothing will continue unless you satisfy that nb != 4
I am writing a small data migration tools from one big database to another small database. All of the others data migration method worked satisfactorily, but the following method has given an exception from the SKIP VALUE IS 100. I run this console script remotely as well as inside of the source server also. I tried in many different was to find the actual problem what it is. After then I found that only from the SKIP VALUE IS 100 it is not working for any TAKE 1,2,3,4,5 or ....
Dear expertise, I don't have any prior knowledge on that type of problem. Any kind of suggestions or comments is appreciatable to resolve this problem. Thanks for you time.
I know this code is not clean and the method is too long. I just tried solve this by adding some line of extra code. Because the problem solving is my main concern. I just copy past the last edited method.
In shot the problem I can illustrate with this following two line
var temp = queryable.Skip(90).Take(10).ToList(); //no exception
var temp = queryable.Skip(100).Take(10).ToList(); getting exception
private static void ImporterDataMigrateToRmgDb(SourceDBEntities sourceDb, RmgDbContext rmgDb)
{
int skip = 0;
int take = 10;
int count = sourceDb.FormAs.Where(x=> x.FormAStateId == 8).GroupBy(x=> x.ImporterName).Count();
Console.WriteLine("Total Possible Importer: " + count);
for (int i = 0; i < count/take; i++)
{
IOrderedQueryable<FormA> queryable = sourceDb.FormAs.Where(x => x.FormAStateId == 8).OrderBy(x => x.ImporterName);
List<IGrouping<string, FormA>> list;
try
{
list = queryable.Skip(skip).Take(take).GroupBy(x => x.ImporterName).ToList();
//this line is getting timeout exception from the skip value of 100.
}
catch (Exception exception)
{
Console.WriteLine(exception.Message);
sourceDb.Dispose();
rmgDb.Dispose();
sourceDb = new SourceDBEntities();
rmgDb = new RmgDbContext();
skip += take;
continue;
}
if (list.Count > 0)
{
foreach (var l in list)
{
List<FormA> formAs = l.ToList();
FormA formA = formAs.FirstOrDefault();
if (formA == null) continue;
Importer importer = formA.ConvertToRmgImporterFromFormA();
Console.WriteLine(formA.FormANo + " " + importer.Name);
var importers = rmgDb.Importers.Where(x => x.Name.ToLower() == importer.Name.ToLower()).ToList();
//bool any = rmgDb.Importers.Any(x => x.Name.ToLower() == formA.ImporterName.ToLower());
if (importers.Count() == 1)
{
foreach (var imp in importers)
{
Importer entity = rmgDb.Importers.Find(imp.Id);
entity.Country = importer.Country;
entity.TotalImportedAmountInUsd = importer.TotalImportedAmountInUsd;
rmgDb.Entry(entity).State = EntityState.Modified;
}
}
else
{
rmgDb.Importers.Add(importer);
}
rmgDb.SaveChanges();
Console.WriteLine(importer.Name);
}
}
skip += take;
}
Console.WriteLine("Importer Data Migration Completed");
}
I have fixed my problem by modifying following code
var queryable =
sourceDb.FormAs.Where(x => x.FormAStateId == 8)
.Select(x => new Adapters.ImporterBindingModel()
{
Id = Guid.NewGuid().ToString(),
Active = true,
Created = DateTime.Now,
CreatedBy = "System",
Modified = DateTime.Now,
ModifiedBy = "System",
Name = x.ImporterName,
Address = x.ImporterAddress,
City = x.City,
ZipCode = x.ZipCode,
CountryId = x.CountryId
})
.OrderBy(x => x.Name);
I have the following code which creates a Task in Salesforce and then tracks a user's browsing history and stores it in SalesForce. Currently, it displays each and every page the user has browsed as an individual entry. I want to group all those entries together in the Browsing_History__c object instead of task being created every time a user visits a page.
Any help would be appreciated..I am not familiar with SF very much. :)
private void CreateTaskInSF(string id, string type, string details, string description)
{
// if there's a similar Event in the past 2 hours, don't add it
QueryResult qr = null;
try // get events from past 2 hours
{
qr = Binding.query("Select Details__c from Task WHERE WhoId='" + id + "' and Type__c='" + type + "' and CreatedDate > " + DateTime.UtcNow.AddHours(-2).ToString("s") + "Z");
}
catch (Exception e)
{
return;
}
bool logged = false;
if (qr != null) // if there are Tasks in past 2 hours
{
sforce.sObject[] browsing = qr.records;
if (browsing != null)
{
// iterate through events to make sure the new Task isn't logged
for (int i = 0; i < browsing.Length; i++)
{
Task currTask = (Task)browsing[i];
if (currTask.Details__c == details)
{
if (description != "") // is there a description to check for?
{
string oldTaskDescription = "";
if (currTask.Description != null)
oldTaskDescription = currTask.Description;
if (oldTaskDescription == description) // if there is a description match
logged = true;
}
else
logged = true; // there's no description, so check only on details field
}
}
}
}
if (logged == true)
{
return; // if Activity is already logged, don't log it again
}
else if (type == "Browsing")
{
QueryResult browsingQuery = null;
try // get events from past 2 hours
{
browsingQuery = Binding.query("Select Web_Browsing__c from Task WHERE WhoId='" + id + "' and Subject='" + type + "' and Details__c='" + details + "' and CreatedDate > " + DateTime.UtcNow.AddHours(-2).ToString("s") + "Z");
}
catch
{
}
Boolean createNewBrowsing = false;
if (browsingQuery != null) // if there are Tasks in past 2 hours
{
sforce.sObject[] webBrowsing = browsingQuery.records;
if (webBrowsing != null)
{
//find correct object and update Browsing_History__c
//Binding.update
}
else
{
createNewBrowsing = true;
}
}
else
{
createNewBrowsing = true;
}
if (createNewBrowsing)
{
Web_Browsing__c newTask = new Web_Browsing__c();
newTask.Lead__c = id;
newTask.Browsing_History_255__c = details;
newTask.Type__c = type;
newTask.Browsing_History__c = details;
newTask.CreatedDate = DateTime.Now;
//if(type == "Browsing") newTask. = details;
//SaveResult[] createResult = Binding.create(new sObject[] { newTask });
try
{
SaveResult[] createResult = Binding.create(new sObject[] { newTask });
}
catch (Exception e)
{
return;
}
}
}
else
{
// if this new Activity isn't logged, then create a new Activity Task
sforce.Task newTask = new sforce.Task();
newTask.WhoId = id;
newTask.Subject = type;
newTask.Details__c = details;
if (description != "") newTask.Description = description;
newTask.Status = "Completed";
newTask.Priority = "Normal";
newTask.ActivityDate = DateTime.Now;
newTask.ActivityDateSpecified = true;
// insert it
try
{
SaveResult[] createResult = Binding.create(new sforce.sObject[] { newTask });
}
catch (Exception e)
{
return;
}
}
}
You'll need to update your query to ask for the browsing history object and update the code to create a browsing history object instead of a task.
If you haven't already, review the Web Services API docs, it has examples for querying and creating in java/c#.