Bulk inserting and updating with Entity Framework (Probably a better alternative?) - c#

I have a data set of devices, addresses, and companies that I need to import into our database, with the catch that our database may already include a specific device/address/company that is included in the new data set. If that is the case, I need to update that entry with the new information in the data set, excluding addresses. We check if an exact copy of that address exists, otherwise we make a new entry.
My issue is that it is very slow to attempt to grab a device/company in EF and if it exist updated it, otherwise insert it. To fix this I tried to get all the companies, devices, and addresses and insert them into respective hashmaps, and check if the identifier of the new data exists in the hashmap. This hasn't led to any performance increases. I've included my code below. Typically I would do a batch insert, I'm not sure what I would do for a batch update though. Can someone advise a different route?
var context = ObjectContextHelper.CurrentObjectContext;
var oldDevices = context.Devices;
var companies = context.Companies;
var addresses = context.Addresses;
Dictionary<string, Company> companyMap = new Dictionary<string, Company>(StringComparer.OrdinalIgnoreCase);
Dictionary<string, Device> deviceMap = new Dictionary<string, Device>(StringComparer.OrdinalIgnoreCase);
Dictionary<string, Address> addressMap = new Dictionary<string, Address>(StringComparer.OrdinalIgnoreCase);
foreach (Company c in companies)
{
if (c.CompanyAccountID != null && !companyMap.ContainsKey(c.CompanyAccountID))
companyMap.Add(c.CompanyAccountID, c);
}
foreach (Device d in oldDevices)
{
if (d.SerialNumber != null && !deviceMap.ContainsKey(d.SerialNumber))
deviceMap.Add(d.SerialNumber, d);
}
foreach (Address a in addresses)
{
string identifier = GetAddressIdentifier(a);
if (!addressMap.ContainsKey(identifier))
addressMap.Add(identifier, a);
}
foreach (DeviceData.TabsDevice device in devices)
{
// update a device
Company tempCompany;
Address tempAddress;
Device currentDevice;
if (deviceMap.ContainsKey(device.SerialNumber)) //update a device
deviceMap.TryGetValue(device.SerialNumber, out currentDevice);
else // insert a new device
currentDevice = new Device();
currentDevice.SerialNumber = device.SerialNumber;
currentDevice.SerialNumberTABS = device.SerialNumberTabs;
currentDevice.Model = device.Model;
if (device.CustomerAccountID != null && device.CustomerAccountID != "")
{
companyMap.TryGetValue(device.CustomerAccountID, out tempCompany);
currentDevice.CustomerID = tempCompany.CompanyID;
currentDevice.CustomerName = tempCompany.CompanyName;
}
if (companyMap.TryGetValue(device.ServicingDealerAccountID, out tempCompany))
currentDevice.CompanyID = tempCompany.CompanyID;
currentDevice.StatusID = 1;
currentDevice.Retries = 0;
currentDevice.ControllerFamilyID = 1;
if (currentDevice.EWBFrontPanelMsgOption == null) // set the Panel option to the default if it isn't set already
currentDevice.EWBFrontPanelMsgOption = context.EWBFrontPanelMsgOptions.Where( i => i.OptionDescription.Contains("default")).Single();
// link the device to the existing address as long as it is actually an address
if (addressMap.TryGetValue(GetAddressIdentifier(device.address), out tempAddress))
{
if (GetAddressIdentifier(device.address) != "")
currentDevice.Address = tempAddress;
else
currentDevice.Address = null;
}
else // insert a new Address and link the device to it (if not null)
{
if (GetAddressIdentifier(device.address) == "")
currentDevice.Address = null;
else
{
tempAddress = new Address();
tempAddress.Address1 = device.address.Address1;
tempAddress.Address2 = device.address.Address2;
tempAddress.Address3 = device.address.Address3;
tempAddress.Address4 = device.address.Address4;
tempAddress.City = device.address.City;
tempAddress.Country = device.address.Country;
tempAddress.PostalCode = device.address.PostalCode;
tempAddress.State = device.address.State;
addresses.AddObject(tempAddress);
addressMap.Add(GetAddressIdentifier(tempAddress), tempAddress);
currentDevice.Address = tempAddress;
}
}
if (!deviceMap.ContainsKey(device.SerialNumber)) // if inserting, add to context
{
oldDevices.AddObject(currentDevice);
deviceMap.Add(device.SerialNumber, currentDevice);
}
}
context.SaveChanges();

Although it doesn't cover your exact problem, this thread has helped me improve the performance of my database import immensly and I recommend you read it.
If you have a lot of hashing, using the task parallel library could help. We also use hash maps to map IDs and it helped a lot. But I recommend you lock{} on the SaveChanges(), so you don't run into concurrency issues (because of this, the TPL only helps when you hash and convert a lot - in our case it helped quite a lot, because we had to do quite a bit of parsing).

Related

Comparing two lists to see if I need to update the database or even add to

I have a simple class which holds a primary key of which I don't know what type it will be before it runs, as i'm getting the data from COM. It will either be an int or string.
I basically just need to fill up my toUpdateList & toAddList. This was working fine below with not too many records to play around with. However now the mongoDBList returns around 65k records and it's all turned very slow and it's taking 15+ minutes to resolve toUpdateList.
I'm pretty new to C# so I'm likely missing something.
I basically just need to compare one list to another and see if the RecordRevision is higher in the toUpdateList. For the toAddList this ones pretty simple as if it doesn't exist it needs to be added.
Thanks for looking I appreciate it!
class KeyRevision
{
public dynamic RecordRevision;
public dynamic PrimaryKey;
}
List<KeyRevision> potentialUpdateList = new List<KeyRevision>();
List<KeyRevision> mongoDBList = new List<KeyRevision>();
List<KeyRevision> toUpdateList = new List<KeyRevision>();
List<KeyRevision> toAddList = new List<KeyRevision>();
var sql = env.ExecuteSQL(sqlQuery);
sql.First();
// Loop over them and add to array
do
{
if (sql.RecordCount > 0)
{
//Console.WriteLine(sql.GetPropertyValue(primaryKey).ToString() + " : " + sql.RecordRevision);
var o = new KeyRevision();
o.PrimaryKey = sql.GetPropertyValue(primaryKey);
o.RecordRevision = sql.RecordRevision;
potentialUpdateList.Add(o);
}
sql.Next();
} while (!sql.EOF);
// Ask mongo for docs
var docs = collection1.Find(_ => true).Project("{RecordRevision: 1}").ToList();
// Get them into our type
mongoDBList = docs.ConvertAll(x => new KeyRevision()
{
PrimaryKey = x.GetValue("_id"),
RecordRevision = x.GetValue("RecordRevision")
});
// Finds which records we need to update
toUpdateList = potentialUpdateList.Where(x =>
mongoDBList.Any(y => y.PrimaryKey == x.PrimaryKey && y.RecordRevision < x.RecordRevision)).ToList();
// Finds the records we need to add
toAddList = potentialUpdateList.Where(x =>
mongoDBList.FindIndex(y => y.PrimaryKey == x.PrimaryKey) < 0).ToList();
Console.WriteLine($"{toUpdateList.Count} need to be updated");
Console.WriteLine($"{toAddList.Count} need to be updated");

Dealing with transactions and arrays with Firebase database

I am saving leaderboard data using save data as transactions
I am looking for a way to remove the array numbering with Firebase transactions and replace them with userId for me to be able to update user information.
Code:
private TransactionResult AddScoreTransaction(MutableData mutableData)
{
playerNewGlobalScore = false;
List<object> leaders = mutableData.Value as List<object>;
if (leaders == null)
{
leaders = new List<object>();
}
else if (mutableData.ChildrenCount >= LeaderBoardManager.Instance.MaxScoreRows)
{
// If the current list of scores is greater or equal to our maximum allowed number,
// we see if the new score should be added and remove the lowest existing score.
long minScore = long.MaxValue;
object minVal = null;
foreach (var child in leaders)
{
if (!(child is Dictionary<string, object>))
continue;
long childScore = (long)((Dictionary<string, object>)child)["score"];
if (childScore < minScore)
{
minScore = childScore;
minVal = child;
}
}
// If the new score is lower than the current minimum, we abort.
if (minScore > Score)
{
return TransactionResult.Abort();
}
// Otherwise, we remove the current lowest to be replaced with the new score.
leaders.Remove(minVal);
}
// Now we add the new score as a new entry that contains the email address and score.
Dictionary<string, object> newScoreMap = new Dictionary<string, object>();
newScoreMap["name"] = Name;
newScoreMap["country"] = Country;
newScoreMap["photoUrl"] = PhotoUrl;
newScoreMap["level"] = Level;
newScoreMap["userId"] = UserId;
newScoreMap["score"] = Score;
leaders.Add(newScoreMap);
// You must set the Value to indicate data at that location has changed.
mutableData.Value = leaders;
playerNewGlobalScore = true;
return TransactionResult.Success(mutableData);
}
Real time database example:
https://cloud.google.com/support#support-plans
Firebase does not support C# and they are happy about that(see https://stackshare.io/stackups/firebase-vs-pusher-vs-signalr), firestore does.
They suggest you go to https://groups.google.com/forum/#!forum/firebase-talk
However https://github.com/step-up-labs/firebase-database-dotnet from Step Up Labs, Inc. (use NuGet Firebase) is useful for the for all the things you want to do over the REST API https://firebase.google.com/docs/reference/rest/database
In the underbelly of the docs is "Conditional requests, the REST equivalent of SDK Transaction Operations, update data according to a certain condition. See an overview of the workflow and learn more about conditional requests for REST in Saving Data." and that can be found at https://firebase.google.com/docs/database/rest/save-data#section-conditional-requests
Hope that is of some help.

Cannot update lead record in CRM?

My code does not seem to be updating the Lead entity. I have created a new Lead(); as you can see in the code and there is no update when I save the record. Is there something I am doing wrong? This way of updating is just as sufficient as updating by doing crmContext.UpdateObject(oLead); crmContext.SaveChanges();. Can someone please help me with why this is not updating the lead? There is a similar question that just refers to the basic plugin but instead of create I am updating.
private List<Product> GetSectionAProducts()
{
List<Product> products = new List<Product>();
var QEproduct = new QueryExpression("product");
// Add all columns to QEproduct.ColumnSet
QEproduct.ColumnSet.AllColumns = true;
// Define filter QEproduct.Criteria
QEproduct.Criteria.AddCondition("productnumber", ConditionOperator.BeginsWith, "MGIP");
var QEproduct_Criteria_0 = new FilterExpression();
QEproduct.Criteria.AddFilter(QEproduct_Criteria_0);
// Define filter QEproduct_Criteria_0
QEproduct_Criteria_0.AddCondition("capg_billingtimeframe", ConditionOperator.Equal, (int)capg_billingcycletype.Monthly);
QEproduct_Criteria_0.AddCondition("capg_mincriteria", ConditionOperator.NotNull);
QEproduct_Criteria_0.AddCondition("capg_maxcriteria", ConditionOperator.NotNull);
QEproduct_Criteria_0.AddCondition("price", ConditionOperator.NotNull);
EntityCollection results = this.OrganizationService.RetrieveMultiple(QEproduct);
if (results.Entities != null)
{
// Retrieve all records from the result set.
foreach (Entity product in results.Entities)
{
products.Add(new Product { Id = product.Id, capg_MinCriteria = (int?)product.Attributes["capg_mincriteria"], capg_MaxCriteria = (int?)product.Attributes["capg_maxcriteria"], Price = (Money)product.Attributes["price"] });
}
}
return products;
}
var duesproduct = sectionA.Where(o => o.capg_MinCriteria.Value <= dues).ToList().OrderByDescending(o => o.capg_MaxCriteria).FirstOrDefault();
if (duesproduct != null)
{
Xrm.Lead oLead = new Lead();
oLead.Id = this.InputTargetEntity.Id;
oLead.capg_CalculatedDuesBilling = new Money(duesproduct.Price == null ? 0 : duesproduct.Price.Value);
if (duesproduct.capg_MaxCriteria <= 100000)
{
oLead.capg_CalculatedDuesBilling = new Money(Math.Round((duesproduct.Price == null ? 0 : duesproduct.Price.Value) * new decimal(0.0290), 2));
}
if (duesproduct.capg_MaxCriteria <= 235000)
{
oLead.capg_CalculatedDuesBilling = new Money(Math.Round((duesproduct.Price == null ? 0 : duesproduct.Price.Value) * new decimal(0.0262), 2));
}
this.OrganizationService.Update(oLead);
}
I highly recommend you updating records with the IOrganizationService.Update directly, not the context. Context is handy for queries, because you can use LINQ, but when you update entities via the context those might be sent to CRM as an update with all the entity attributes you selected, and that could cause weird behaviors because might trigger unexpected workflows. It also fills the Audit History really quickly.
It is much better to create a new instance of a lead, and populate it with the attributes and Id you want to update, only, and call service.Update.

Why does this EF upsert logic result in deletions instead of updates?

The following code results in deletions instead of updates.
My question is: is this a bug in the way I'm coding against Entity Framework or should I suspect something else?
Update: I got this working, but I'm leaving the question now with both the original and the working versions in hopes that I can learn something I didn't understand about EF.
In this, the original non working code, when the database is fresh, all the additions of SearchDailySummary object succeed, but on the second time through, when my code was supposedly going to perform the update, the net result is a once again empty table in the database, i.e. this logic manages to be the equiv. of removing each entity.
//Logger.Info("Upserting SearchDailySummaries..");
using (var db = new ClientPortalContext())
{
foreach (var item in items)
{
var campaignName = item["campaign"];
var pk1 = db.SearchCampaigns.Single(c => c.SearchCampaignName == campaignName).SearchCampaignId;
var pk2 = DateTime.Parse(item["day"].Replace('-', '/'));
var source = new SearchDailySummary
{
SearchCampaignId = pk1,
Date = pk2,
Revenue = decimal.Parse(item["totalConvValue"]),
Cost = decimal.Parse(item["cost"]),
Orders = int.Parse(item["conv1PerClick"]),
Clicks = int.Parse(item["clicks"]),
Impressions = int.Parse(item["impressions"]),
CurrencyId = item["currency"] == "USD" ? 1 : -1 // NOTE: non USD (if exists) -1 for now
};
var target = db.Set<SearchDailySummary>().Find(pk1, pk2) ?? new SearchDailySummary();
if (db.Entry(target).State == EntityState.Detached)
{
db.SearchDailySummaries.Add(target);
addedCount++;
}
else
{
// TODO?: compare source and target and change the entity state to unchanged if no diff
updatedCount++;
}
AutoMapper.Mapper.Map(source, target);
itemCount++;
}
Logger.Info("Saving {0} SearchDailySummaries ({1} updates, {2} additions)", itemCount, updatedCount, addedCount);
db.SaveChanges();
}
Here is the working version (although I'm not 100% it's optimized, it's working reliably and performing fine as long as I batch it out in groups of 500 or less items in a shot - after that it slows down exponentially but I think that just may be a different question/subject)...
//Logger.Info("Upserting SearchDailySummaries..");
using (var db = new ClientPortalContext())
{
foreach (var item in items)
{
var campaignName = item["campaign"];
var pk1 = db.SearchCampaigns.Single(c => c.SearchCampaignName == campaignName).SearchCampaignId;
var pk2 = DateTime.Parse(item["day"].Replace('-', '/'));
var source = new SearchDailySummary
{
SearchCampaignId = pk1,
Date = pk2,
Revenue = decimal.Parse(item["totalConvValue"]),
Cost = decimal.Parse(item["cost"]),
Orders = int.Parse(item["conv1PerClick"]),
Clicks = int.Parse(item["clicks"]),
Impressions = int.Parse(item["impressions"]),
CurrencyId = item["currency"] == "USD" ? 1 : -1 // NOTE: non USD (if exists) -1 for now
};
var target = db.Set<SearchDailySummary>().Find(pk1, pk2);
if (target == null)
{
db.SearchDailySummaries.Add(source);
addedCount++;
}
else
{
AutoMapper.Mapper.Map(source, target);
db.Entry(target).State = EntityState.Modified;
updatedCount++;
}
itemCount++;
}
Logger.Info("Saving {0} SearchDailySummaries ({1} updates, {2} additions)", itemCount, updatedCount, addedCount);
db.SaveChanges();
}
The thing that keeps popping up in my mind is that maybe the Entry(entity) or Find(pk) method has some side effects? I should probably be consulting the documentation but any advice is appreciated..
It's a slight assumption on my part (without looking into your models/entities), but have a look at what's going on within this block (see if the objects being attached here are related to the deletions):
if (db.Entry(target).State == EntityState.Detached)
{
db.SearchDailySummaries.Add(target);
addedCount++;
}
Your detached object won't be able to use its navigation properties to locate its related objects; you'll be re-attaching an object in a potentially conflicting state (without the correct relationships).
You haven't mentioned what is being deleted above, so I may be way off. Just off out, so this is a little rushed, hope there's something useful in there.

How can I make this code faster?? List<custom_class>().Find(lambda_expression)

I need to match email sends with email bounces so I can find if they were delivered or not. The catch is, I have to limit the bounce to within 4 days of the send to eliminate matching the wrong send to the bounce. Send records are spread over a period of 30 days.
LinkedList<event_data> sent = GetMyHugeListOfSends(); //for example 1M+ records
List<event_data> bounced = GetMyListOfBounces(); //for example 150k records
bounced = bounced.OrderBy(o => o.event_date).ToList(); //this ensures the most accurate match of bounce to send (since we find the first match)
List<event_data> delivered = new List<event_data>();
event_data deliveredEmail = new event_data();
foreach (event_data sentEmail in sent)
{
event_data bounce = bounced.Find(item => item.email.ToLower() == sentEmail.email.ToLower() && (item.event_date > sentEmail.event_date && item.event_date < sentEmail.event_date.AddDays(deliveredCalcDelayDays)));
//create delivered records
if (bounce != null)
{
//there was a bounce! don't add a delivered record!
}
else
{
//if sent is not bounced, it's delivered
deliveredEmail.sid = siteid;
deliveredEmail.mlid = mlid;
deliveredEmail.mid = mid;
deliveredEmail.email = sentEmail.email;
deliveredEmail.event_date = sentEmail.event_date;
deliveredEmail.event_status = "Delivered";
deliveredEmail.event_type = "Delivered";
deliveredEmail.id = sentEmail.id;
deliveredEmail.number = sentEmail.number;
deliveredEmail.laststoretransaction = sentEmail.laststoretransaction;
delivered.Add(deliveredEmail); //add the new delivered
deliveredEmail = new event_data();
//remove bounce, it only applies to one send!
bounced.Remove(bounce);
}
if (bounced.Count() == 0)
{
break; //no more bounces to match!
}
}
So I did some testing and it's processing about 12 sent records per second. At 1M+ records, it will take 25+ hours to process!
Two questions:
How can I find the exact line that is taking the most time?
I am assuming it's the lambda expression finding the bounce that is taking the longest since this was much faster before I put that in there. How can I speed this up?
Thanks!
Edit
---Ideas---
One idea I just had is to sort the sends by date like I did the bounces so that the search through the bounces will be more efficient, since an early send would be likely to hit an early bounce as well.
Another idea I just had is to run a couple of these processes in parallel, although I would hate to multi-thread this simple application.
I would be reasonably confident in saying that yes it is your find that is taking the time.
It looks like you are certain that the find method will return 0 or 1 records only (not a list) in which case the way to speed this up would be to create a lookup (a dictionary) instead of creating a List<event_data> for your bounced var, create a Dictionary<key, event_data> instead, then you can just look-up the value by key instead of doing a find.
The trick is in creating your key (I don't know enough about your app to help with that) but essentially the same criteria that is in your find.
EDIT. (adding some pseudo code)
void Main()
{
var hugeListOfEmails = GetHugeListOfEmails();
var allBouncedEmails = GetAllBouncedEmails();
IDictionary<string, EmailInfo> CreateLookupOfBouncedEmails = CreateLookupOfBouncedEmails(allBouncedEmails);
foreach(var info in hugeListOfEmails)
{
if(CreateLookupOfBouncedEmails.ContainsKey(info.emailAddress))
{
// Email is bounced;
}
else
{
// Email is not bounced
}
}
}
public IEnumerable<EmailInfo> GetHugeListOfEmails()
{
yield break;
}
public IEnumerable<EmailInfo> GetAllBouncedEmails()
{
yield break;
}
public IDictionary<string, EmailInfo> CreateLookupOfBouncedEmails(IEnumerable<EmailInfo> emailList)
{
var result = new Dictionary<string, EmailInfo>();
foreach(var e in emailList)
{
if(!result.ContainsKey(e.emailAddress))
{
if(//satisfies the date conditions)
{
result.Add(e.emailAddress, e);
}
}
}
return result;
}
public class EmailInfo
{
public string emailAddress { get; set; }
public DateTime DateSent { get; set; }
}
You should improve by using ToLookup method to create lookup table for email address
var bouncedLookup = bounced.ToLookup(k => k.email.ToLower());
and use this in the loop to lookup by the email first
var filteredBounced = bouncedLookup[sent_email.email.ToLower()];
// mini optimisation here
var endDate = sentEmail.event_date.AddDays(deliveredCalcDelayDays);
event_data bounce = filteredBounced.Find(item => item.event_date > sentEmail.event_date && item.event_date < endDate));
I could not compile it but I think that should do. Please try it.
You are finding items in a list. That means it has to traverse the whole list so it is an order (n) operation. Could you not store those sent emails in a Dictionary with the key being the email address you are searching on. The go through the bounces linking back to the emails in the dictionary. The lookup will be constant time and the you will go through the bounces so it will be order (n) overall. You current method is order (n squared)
Converting bounced to sortedlist might be a good solution
SortedList<string,data> sl = new SortedList<string,event_data>(bounced.ToDictionary(s=>s.email,s=>s));
and to find a bounce use
sl.Select(c=>c.Key.Equals(item => item.email,StringComparison.OrdinalIgnoreCase) && ...).FirstOrDefault();
There's another concern about your code, that I want to point out.
Memory consumption. I don't know your machine configuration, but here are some thoughts about the code:
Initially you are allocating space for 1,2M+ objects of event_data
type. I can't see event_data full type definition, but assuming
that emails are all unique and seeing that the type has quite many
properties, I can assume that such a collection is rather heavy
(hundreds of Meg possibly).
Next you are allocating another bunch of event_data objects
(almost 1M if I've counted it right). It's getting even more heavy
in terms of memory consumption
I don't know about other objects, that are present in data-model of your application, but considering all things I've mentioned, you can easily get close to memory limit
for 32-bit process and thus force GC to work very often. In fact
you can easily have a GC collecting after each call
bounced.Remove(bounce); And it's really would significantly slow down your app.
So, even if you are having a plenty of memory left and/or your app is 64-bit, I would try to minimize memory consumption. Pretty sure it would get your code run faster. For example, you can do complete processing of deliveredEmail, without storing it, or load your initial event_data in chunks etc.
On Consideration, the number of bounces is relatively small, so,
Why not pre omptimise the bounce lookup as much as possible, this code makes a delegate for each possible bounce and groups them into a dictionary for access by the e-mail key.
private static DateInRange(
DateTime sendDate,
DateTime bouncedDate,
int deliveredCalcDelayDays)
{
if (sentDate < bouncedDate)
{
return false;
}
return sentDate < bouncedDate.AddDays(deliveredCalcDelayDays);
}
static IEnumerable<event_data> GetDeliveredMails(
IEnumerable<event_data> sent,
IEnumerable<event_data> bounced,
int siteId,
int mlId,
int mId,
int deliveredCalcDelayDays)
{
var grouped = bounced.GroupBy(
b => b.email.ToLowerInvariant());
var lookup = grouped.ToDictionary(
g => g.Key,
g => g.OrderBy(e => e.event_date).Select(
e => new Func<DateTime, bool>(
s => DateInRange(s, e.event_date, deliveredCalcDelayDays))).ToList());
foreach (var s in sent)
{
var key = s.email.ToLowerInvariant();
List<Func<DateTime, nool>> checks;
if (lookup.TryGetValue(key, out checks))
{
var match = checks.FirstOrDefault(c => c(s.event_date));
if (match != null)
{
checks.Remove(match);
continue;
}
}
yield return new event_data
{
.sid = siteid;
.mlid = mlid;
.mid = mid;
.email = s.email;
.event_date = s.event_date;
.event_status = "Delivered";
.event_type = "Delivered";
.id = s.id;
.number = s.number;
.laststoretransaction = s.laststoretransaction
};
}
}
You could try pre-compiling the delegates in lookup if this is not fast enough.
Ok the final solution I found was a Dictionary for the bounces.
The sent LinkedList was sorted by sent_date so it would loop through in chronological order. That's important because I have to match the right send to the right bounce.
I made a Dictionary<string,<List<event_data>>, so the key was email, and the value was a List of all <event_data> bounces for the email address. The List was sorted by event_date since I wanted to make sure the first bounce was matched to the send.
Final result...it went from processing 700 records/minute to 500k+ records/second.
Here is the final code:
LinkedList sent = GetMyHugeListOfSends();
IEnumerable sentOrdered = sent.OrderBy(send => send.event_date);
Dictionary> bounced = GetMyListOfBouncesAsDictionary();
List delivered = new List();
event_data deliveredEmail = new event_data();
List bounces = null;
bool matchedBounce = false;
foreach (event_data sentEmail in sentOrdered)
{
matchedBounce = false;
//create delivered records
if (bounced.TryGetValue(sentEmail.email, out bounces))
{
//there was a bounce! find out if it was within 4 days after the send!
foreach (event_data bounce in bounces)
{
if (bounce.event_date > sentEmail.event_date &&
bounce.event_date <= sentEmail.event_date.AddDays(4))
{
matchedBounce = true;
//remove the record because a bounce can only match once back to a send
bounces.Remove(bounce);
if(bounces.Count == 0) //no more bounces for this email
{
bounced.Remove(sentEmail.email);
}
break;
}
}
if (matchedBounce == false) //no matching bounces in the list!
{
//if sent is not bounced, it's delivered
deliveredEmail.sid = siteid;
deliveredEmail.mlid = mlid;
deliveredEmail.mid = mid;
deliveredEmail.email = sentEmail.email;
deliveredEmail.event_date = sentEmail.event_date;
deliveredEmail.event_status = "Delivered";
deliveredEmail.event_type = "Delivered";
deliveredEmail.id = sentEmail.id;
deliveredEmail.number = sentEmail.number;
deliveredEmail.laststoretransaction = sentEmail.laststoretransaction;
delivered.Add(deliveredEmail); //add the new delivered
deliveredEmail = new event_data();
}
}
else
{
//if sent is not bounced, it's delivered
deliveredEmail.sid = siteid;
deliveredEmail.mlid = mlid;
deliveredEmail.mid = mid;
deliveredEmail.email = sentEmail.email;
deliveredEmail.event_date = sentEmail.event_date;
deliveredEmail.event_status = "Delivered";
deliveredEmail.event_type = "Delivered";
deliveredEmail.id = sentEmail.id;
deliveredEmail.number = sentEmail.number;
deliveredEmail.laststoretransaction = sentEmail.laststoretransaction;
delivered.Add(deliveredEmail); //add the new delivered
deliveredEmail = new event_data();
}
if (bounced.Count() == 0)
{
break; //no more bounces to match!
}
}

Categories

Resources