c# lucene 4.8 slow search: 400ms -> to 30ms? - c#

i try to speed up my lucene-search in my wpf-application.
i hoped that my search would be in the range of about 30ms.
87 search items where found in the index. So that is not very much.
But Stopwatch timer says, it takes around 400ms, way to much for me.
So can u check my code, how i can improve code ?
I also still measured time from the beginning of the try block to the foreach, so there is no big waste of time through init; but it is not (0 ms).
List<CardView> aAll_CardView = new List<CardView>();
try
{
SortField field = new SortField(LUCENT_STATE_MAIN, SortFieldType.STRING);
Sort sort = new Sort(field);
searchManager.MaybeRefreshBlocking(); // execute with fresh index searcher
var searcher = searchManager.Acquire();
var topDocs = searcher.Search(aBooleanQuery, 100, sort);
var _totalHits = topDocs.TotalHits;
CardView aCardView = null;
// measured time: take ~400-500ms grr !!!!
foreach ( var result in topDocs.ScoreDocs)
{
#region iterate through findings and put lucene data into CardView list
var aDoc = searcher.Doc(result.Doc);
aAll_CardView.Add(new CardView
{
// all fields are defined as TextField()...
// must be first, because used in e.g. Nr_Main
RelatedItemCount = aDoc.Get(LUCENT_RELATED_ITEMS),
Nr_Main = aDoc.Get(LUCENT_NR_MAIN),
Nr_Parent = aDoc.Get(LUCENT_NR_PARENT),
Antwort = aDoc.Get(LUCENT_ANTWORT),
Beschreibung = aDoc.Get(LUCENT_BESCHREIBUNG),
Note = aDoc.Get(LUCENT_NOTES),
Question_Main = aDoc.Get(LUCENT_TITLE_MAIN),
Question_Parent = aDoc.Get(LUCENT_TITLE_PARENT),
Book = aDoc.Get(LUCENT_BOOK),
Date_Create = aDoc.Get(LUCENT_DATE_CREATED),
Date_LastEdit = aDoc.Get(LUCENT_DATE_LASTEDIT),
Bibelstelle = aDoc.Get(LUCENT_BIBELSTELLE),
// ParseCore just uses TryParse to get enum for state
Status_Main = ParseCore(aDoc.Get(LUCENT_STATE_MAIN)),
Status_Parent = ParseCore(aDoc.Get(LUCENT_STATE_PARENT))
});
#endregion
}
}
catch(Exception e)
{
string exp = e.ToString();
new JMsg(exp).ShowDialog();
}
finally
{
}

Related

Why is PropertyDataCollection object persisting multiple records to database

I have a utility that reads the status of MicrosoftBizTalk Server resources .. specifically the ReceiveLocation component. My problem is that the program is submitting multiple entries of each item i.e each item in the data returned is being multiplied by 25 such that instead of persisting only 5 rows the data being persisted is 125. So for example instead of having just 1 row for my first row returned i have 25.
This is my program :
public List<BizTalk> GetBizTalkServicesStatistics()
{
List<BizTalk> model = new List<BizTalk>();
try
{
//Create the WMI search object.
ManagementObjectSearcher Searcher = new ManagementObjectSearcher();
ConnectionOptions options = new ConnectionOptions
{
Username = "+username+",
Password = "+password+",
Authority = "+domain+"
};
var server = "+server+";
// create the scope node so we can set the WMI root node correctly.
ManagementScope Scope = new ManagementScope("\\\\" + server + "\\root\\MicrosoftBizTalkServer", options);
Searcher.Scope = Scope;
// Build a Query to enumerate the MSBTS_ReceiveLocation instances if an argument
// is supplied use it to select only the matching RL.
//if (args.Length == 0)
SelectQuery Query = new SelectQuery();
Query.QueryString = "SELECT * FROM MSBTS_ReceiveLocation";
// else
//Query.QueryString = "SELECT * FROM MSBTS_ReceiveLocation WHERE Name = '" + args[0] + "'";
// Set the query for the searcher.
Searcher.Query = Query;
// Execute the query and determine if any results were obtained.
ManagementObjectCollection QueryCol = Searcher.Get();
// Use a bool to tell if we enter the for loop
// below because Count property is not supported
bool ReceiveLocationFound = false;
// Enumerate all properties.
foreach (ManagementBaseObject envVar in QueryCol)
{
// There is at least one Receive Location
ReceiveLocationFound = true;
PropertyDataCollection envVarProperties = envVar.Properties;
foreach (PropertyData envVarProperty in envVarProperties)
{
BizTalk bizTalk = new BizTalk();
bizTalk.Name = Convert.ToString(envVar["Name"]);
bizTalk.TransportType = Convert.ToString(envVar["AdapterName"]);
bizTalk.Uri = Convert.ToString(envVar["InboundTransportURL"]);
bizTalk.Status = Convert.ToString(envVar["Name"]);
bizTalk.ReceiveHandler = Convert.ToString(envVar["HostName"]);
bizTalk.ReceivePort = Convert.ToString(envVar["ReceivePortName"]);
bizTalk.RunDate = DateTime.Now;
bizTalk.ApplicationId = 24;
bizTalk.ServerId = 8;
bizTalk.InstanceName = "FBCZOP";
model.Add(bizTalk);
}
}
if (!ReceiveLocationFound)
{
Console.WriteLine("No receive locations found matching the specified name.");
}
}
catch (Exception excep)
{
ExceptionLogger.SendErrorToText(excep);
}
return model;
}
Save Function
public void SaveStatistics(BizTalk entity)
{
List<BizTalk> ServerInfo = new List<BizTalk>();
ServerInfo = GetBizTalkServicesStatistics();
foreach (var di in ServerInfo)
{
entity.RunDate = di.RunDate;
entity.Name = di.Name;
entity.Status = di.Status;
entity.Uri = di.Uri;
entity.InstanceName = di.InstanceName;
entity.ReceivePort = di.ReceivePort;
entity.TransportType= di.TransportType;
entity.RunDate = DateTime.Now;
entity.ReceiveHandler = di.ReceiveHandler;
entity.ServerId = entity.ServerId;
entity.ApplicationId = entity.ApplicationId;
appEntities.BizTalk.Add(entity);
appEntities.SaveChanges();
}
}
When i step through the code variable envVarProperties shows record count as 125 under envVarProperties << ResultsView :
Link 1
whilst QueryCol variable shows count of 5 :
Link 2
It looks like you're iterating an extra time in your GetBizTalkServicesStatistics() method.
Remove the foreach loop that starts with foreach (PropertyData envVarProperty in envVarProperties). This is looping through each property the object has (All 25 properties) for each instance (5 instances)... 25 * 5 = 125 values you are retrieving. You only want to iterate through your instances and pull the properties you want. That way you end up with 5 objects in your model object.
I'd suggest maybe something like this (untested because I don't have BizTalk)
public List<BizTalk> GetBizTalkServicesStatistics()
{
List<BizTalk> model = new List<BizTalk>();
try
{
//Create the WMI search object.
ConnectionOptions options = new ConnectionOptions
{
Username = "+username+",
Password = "+password+",
Authority = "+domain+"
};
var server = "+server+";
// create the scope node so we can set the WMI root node correctly.
ManagementScope Scope = new ManagementScope("\\\\" + server + "\\root\\MicrosoftBizTalkServer", options);
ManagementObjectSearcher Searcher = new ManagementObjectSearcher(Scope, new ObjectQuery("SELECT * FROM MSBTS_ReceiveLocation"));
// Enumerate all properties.
foreach (ManagementObject instance in Searcher.Get())
{
{
BizTalk bizTalk = new BizTalk();
bizTalk.Name = instance.Properties["Name"]?.Value?.ToString();
bizTalk.TransportType = instance.Properties["AdapterName"]?.Value?.ToString();
bizTalk.Uri = instance.Properties["InboundTransportURL"]?.Value?.ToString();
bizTalk.Status = instance.Properties["Name"]?.Value?.ToString();
bizTalk.ReceiveHandler = instance.Properties["HostName"]?.Value?.ToString();
bizTalk.ReceivePort = instance.Properties["ReceivePortName"]?.Value?.ToString();
bizTalk.RunDate = DateTime.Now;
bizTalk.ApplicationId = 24;
bizTalk.ServerId = 8;
bizTalk.InstanceName = "FBCZOP";
model.Add(bizTalk);
}
}
// Determine
if (model.Count == 0)
{
Console.WriteLine("No receive locations found matching the specified name.");
}
}
catch (Exception excep)
{
ExceptionLogger.SendErrorToText(excep);
}
return model;
}
Also, this can be simplified more if you remove the connectionoptions (unless you are hard coding credentials which is highly advised against). If you are just using the identity of the executing user, that data is not needed.
-Paul
You are adding the same entity 25 times and overwrite its properties by reference. You need to initialize a new entity inside your loop:
foreach (var di in ServerInfo)
{
var entity = new BizTalk();
entity.RunDate = di.RunDate;
entity.Name = di.Name;
entity.Status = di.Status;
entity.Uri = di.Uri;
entity.InstanceName = di.InstanceName;
entity.ReceivePort = di.ReceivePort;
entity.TransportType= di.TransportType;
entity.RunDate = DateTime.Now;
entity.ReceiveHandler = di.ReceiveHandler;
entity.ServerId = entity.ServerId;
entity.ApplicationId = entity.ApplicationId;
appEntities.BizTalk.Add(entity);
appEn.SaveChanges();
}
}
As you don't show the code where "SaveStatistics" is called it's not sure this will fix your complete problem, but it's at least one method that does not do what you expect it to do.

Insert parent-child rows using Parallel programming

I have a method that iterates the rows using for loop and inside that for loop call the DNN API that inserts a parent child row into the database. The maximum to insert the data is 1000 rows per execution. I tested it on my local machine the performance is very slow. I am thinking to use a Parallel Programming, but not sure on how to implement this since there is a dependency on data insertion because of parent child relationship. And I am not sure if this is a good approach to use Parallel programming. Any help please?
Sample code
private void AddTab()
{
for (i = 0; i < 1000; i++) {
TabController tabController = new TabController();
var portalSettings = new
DotNetNuke.Entities.Portals.PortalSettings(info.PortalId);
TabInfo tab = new TabInfo();
tab.PortalID = info.PortalId;
tab.TabName = info.TabName;
tab.Title = info.Title;
tab.Description = info.TabName;
tab.KeyWords = info.TabName;
tab.IsVisible = info.IsVisible;
tab.DisableLink = info.IsDisabled;
tab.ParentId = info.ParentId == null ? Null.NullInteger : info.ParentId.GetValueOrDefault();
tab.IsDeleted = false;
tab.Url = "";
tab.SkinSrc = "[G]Skins/HRT.Portal.DNNThemes.Default/Home.ascx";
tab.ContainerSrc = portalSettings.DefaultPortalContainer;
tab.IsSuperTab = false;
var parentPage = tabController.GetTab(portalSettings.HomeTabId, info.PortalId);
//clone parent page permissions
foreach (TabPermissionInfo permission in parentPage.TabPermissions.ToList())
{
tab.TabPermissions.Add(permission);
}
int tabId = tabController.AddTab(tab, true);
}
}

Possible Leak when reading content?

I created a tool which iterates through all Commits of a repository.
Then it makes a diff of the commit to all of its parents and then reads the content for some checks.
Now it came out that this slows down very quickly. It always slows down after the same specific commit which is quite large as it is a merge commit.
Here the code how I iteratre through the commits. The following codes are a little simplified for a better focus.
var repo = new Repository(path);
foreach (LibGit2Sharp.Commit commit in repo.Commits)
{
IEnumerable<FileChanges> changed = repo.GetChangedToAllParents(commit);
var files = ResolveChangeFileInfos(changed);
var entry = new Commit(commit.Id.ToString(), commit.Author.Email, commit.Committer.When, commit.Message, files);
yield return entry;
}
in GetChagedToAllParents I basically make a diff foreach parent like this
foreach(var parent in commit.parent)
{
var options = new CompareOptions
{
Algorithm = DiffAlgorithm.Minimal,
IncludeUnmodified = false
};
var patches = repo.Diff.Compare<Patch>(parent.Tree, commit.Tree, options); // Difference
}
and later I read the content of the files in this way:
var blob = repo.Lookup<Blob>(patchEntry.Oid.Sha); // Find blob
Stream contentStream = blob.GetContentStream();
string result = null;
using (var tr = new StreamReader(contentStream, Encoding.UTF8))
{
result = tr.ReadToEnd();
}
Are there any known issues ? am I missing any leaks?
Update
I Found out that most of the time (about 90%) is taken by the diff. and it gets constantly slower
var options = new CompareOptions
{
Algorithm = DiffAlgorithm.Minimal,
IncludeUnmodified = false
};
var patches = repo.Diff.Compare<Patch>(parent.Tree, commit.Tree, options); // Difference
I can reproduce it with this code:
var repo = new Repository(path);
foreach (var commit in repo.Commits)
{
pos++;
if (pos%100 == 0)
{
Console.WriteLine(pos);
}
var options = new CompareOptions
{
Algorithm = DiffAlgorithm.Minimal,
IncludeUnmodified = false,
Similarity = new SimilarityOptions
{
RenameDetectionMode = RenameDetectionMode.None,
WhitespaceMode = WhitespaceMode.IgnoreAllWhitespace
}
};
foreach (var parent in commit.Parents)
{
var changedFiles=
repo.Diff.Compare<TreeChanges>(parent.Tree, commit.Tree, options).ToList();
}
}
it reserved about 500MB for each 1000 commits and at some point it just crashes. So I posted it also here:
https://github.com/libgit2/libgit2sharp/issues/1359
Is there a faster way to get all files that were changed in a specific commit?

Parallel task on tree structure not complete

Very new to .NET task parallelism. The objective is walking through a tree structure, where each branch is composed of one parent node, one child node and one operation node(like a weight). And for each node, create an extension object and save it to db. I followed a possible duplicate conversation. But the observation is that the tree is not walked through completely. The process would exit early unexpectedly. Following is my code:
public void InitializeScheduleVariables_Parallel(IResource ANode, double aNumRequired, double aBatchRequired, double aAcceptProbability, AppContext aAppContext, bool ARecursively = true)
{
var LTasks = new List<Task>();
var LUser = aAppContext.LocalContext.User;
LTasks.Add(Task.Factory.StartNew(() =>
{
var LNewContext = new AppContext(new DbContext(new Context(LUser)));
var LNewRep = new ResourceRepository(LNewContext);
ANode = LNewRep.Get(ANode.Id);
ANode.ResourceInstance_Create(); // Create the ResourceInstance on the Resourse if it not already exists.
ANode.ResourceInstance.Required = aNumRequired;
ANode.ResourceInstance.ScheduleSource = ResourceInstance.ScheduleSourceEnum.Undefined;
ANode.ResourceInstance.ScheduleState = ResourceInstance.ScheduleStateEnum.Unscheduled;
ANode.ResourceInstance.ScheduleMode = ResourceInstance.ScheduleModeEnum.Undefined;
ANode.ResourceInstance.BatchRequired = aBatchRequired;
ANode.ResourceInstance.ProbabilityOfCompletion = aAcceptProbability;
ANode.ResourceInstance.Save();
}));
if (ARecursively)
{
foreach (AssemblyLink LAssembly in ANode.GetOutEdges())
{
LTasks.Add(Task.Factory.StartNew(() =>
{
// SET The Variables for the Production Operations AS WELL
IOperationResource LOperation = LAssembly.Operation;
if (LOperation != null)
{
var LNewContext = new AppContext(new DbContext(new Context(LUser)));
var LNewRep = new OperationResourceRepository(LNewContext);
LOperation = LNewRep.Get(LOperation.Id);
LOperation.ResourceInstance_Create(); // Create the ResourceInstance on the Resourse if it not already exists.
LOperation.ResourceInstance.Required = aNumRequired / LAssembly.OutputQuantity;
LOperation.ResourceInstance.BatchRequired = aBatchRequired / LAssembly.OutputQuantity;
LOperation.ResourceInstance.ScheduleSource = ResourceInstance.ScheduleSourceEnum.Undefined;
LOperation.ResourceInstance.ScheduleState = ResourceInstance.ScheduleStateEnum.Unscheduled;
LOperation.ResourceInstance.ScheduleMode = ResourceInstance.ScheduleModeEnum.Undefined;
LOperation.ResourceInstance.ProbabilityOfCompletion = aAcceptProbability;
LOperation.ResourceInstance.Save();
}
}));
LTasks.Add(Task.Factory.StartNew(() =>
{
// Recursively SET Child NODES
IResource LChildNode = LAssembly.Child;
double LNumRequired_Child = aNumRequired * LAssembly.InputQuantity / LAssembly.OutputQuantity;
double LNumBatchRequired_Child = LChildNode.Quantity * LAssembly.InputQuantity / LAssembly.OutputQuantity;
InitializeScheduleVariables_Parallel(LChildNode, LNumRequired_Child, LNumBatchRequired_Child, aAcceptProbability, aAppContext, ARecursively);
}));
}
}
Task.WaitAll(LTasks.ToArray());
}
Could anyone share some thought? Thank you.

Effective pagination with Active Directory searches

What would be an effective way to do pagination with Active Directory searches in .NET? There are many ways to search in AD but so far I couldn't find how to do it effectively. I want to be able to indicate Skip and Take parameters and be able to retrieve the total number of records matching my search criteria in the result.
I have tried searching with the PrincipalSearcher class:
using (var ctx = new PrincipalContext(ContextType.Domain, "FABRIKAM", "DC=fabrikam,DC=com"))
using (var criteria = new UserPrincipal(ctx))
{
criteria.SamAccountName = "*foo*";
using (var searcher = new PrincipalSearcher(criteria))
{
((DirectorySearcher)searcher.GetUnderlyingSearcher()).SizeLimit = 3;
var results = searcher.FindAll();
foreach (var found in results)
{
Console.WriteLine(found.Name);
}
}
}
Here I was able to limit the search results to 3 but I wasn't able to get the total number of records corresponding to my search criteria (SamAccountName contains foo) neither I was able to indicate to the searcher to skip the first 50 records for example.
I also tried using the System.DirectoryServices.DirectoryEntry and System.DirectoryServices.Protocols.SearchRequest but the only thing I can do is specify the page size.
So is the only way to fetch all the results on the client and do the Skip and Count there? I really hope that there are more effective ways to achieve this directly on the domain controller.
You may try the virtual list view search. The following sort the users by cn, and then get 51 users starting from the 100th one.
DirectoryEntry rootEntry = new DirectoryEntry("LDAP://domain.com/dc=domain,dc=com", "user", "pwd");
DirectorySearcher searcher = new DirectorySearcher(rootEntry);
searcher.SearchScope = SearchScope.Subtree;
searcher.Filter = "(&(objectCategory=person)(objectClass=user))";
searcher.Sort = new SortOption("cn", SortDirection.Ascending);
searcher.VirtualListView = new DirectoryVirtualListView(0, 50, 100);
foreach (SearchResult result in searcher.FindAll())
{
Console.WriteLine(result.Path);
}
For your use case you only need the BeforeCount, AfterCount and the Offset properties of DirectoryVirtualListView (the 3 in DirectoryVirtualListView ctor). The doc for DirectoryVirtualListView is very limited. You may need to do some experiments on how it behave.
If SizeLimit is set to zero and PageSize is set to 500, the search will return all 12,000 results in pages of 500 items, with the last page containing only 200 items. The paging occurs transparently to the application and the application does not have to perform any special processing other than setting the PageSize property to the proper value.
SizeLimit limits the amount of results that you can retrieve at once - so your PageSize needs to be less than or equal to 1000 (Active Directory limits the maximum number of search results to 1000. In this case, setting the SizeLimit property to a value greater than 1000 has no effect.). The paging is done automagically behind the scenes when you call FindAll() etc.
For more details please refer MSDN
https://msdn.microsoft.com/en-us/library/ms180880.aspx
https://msdn.microsoft.com/en-us/library/system.directoryservices.directorysearcher.pagesize.aspx
https://msdn.microsoft.com/en-us/library/system.directoryservices.directorysearcher.sizelimit.aspx
Waaaay late to the party, but this is what I'm doing:
I'm using FindOne() instead of FindAll() and member;range=<start>-<end> on PropertiesToLoad.
There's a catch on member;range: when it's the last page, even if you pass member;range=1000-1999 (for instance), it returns member;range=1000-*, so you have to check for the * at the end to know if there is more data.
public void List<string> PagedSearch()
{
var list = new List<string>();
bool lastPage = false;
int start = 0, end = 0, step = 1000;
var rootEntry = new DirectoryEntry("LDAP://domain.com/dc=domain,dc=com", "user", "pwd");
var filter = "(&(objectCategory=person)(objectClass=user)(samAccountName=*foo*))";
using (var memberSearcher = new DirectorySearcher(rootEntry, filter, null, SearchScope.Base))
{
while (!lastPage)
{
start = end;
end = start + step - 1;
memberSearcher.PropertiesToLoad.Clear();
memberSearcher.PropertiesToLoad.Add(string.Format("member;range={0}-{1}", start, end));
var memberResult = memberSearcher.FindOne();
var membersProperty = memberResult.Properties.PropertyNames.Cast<string>().FirstOrDefault(p => p.StartsWith("member;range="));
if (membersProperty != null)
{
lastPage = membersProperty.EndsWith("-*");
list.AddRange(memberResult.Properties[membersProperty].Cast<string>());
end = list.Count;
}
else
{
lastPage = true;
}
}
}
return list;
}
private static DirectoryEntry forestlocal = new DirectoryEntry(LocalGCUri, LocalGCUsername, LocalGCPassword);
private DirectorySearcher localSearcher = new DirectorySearcher(forestlocal);
public List<string> GetAllUsers()
{
List<string> users = new List<string>();
localSearcher.SizeLimit = 10000;
localSearcher.PageSize = 250;
string localFilter = string.Format(#"(&(objectClass=user)(objectCategory=person)(!(objectClass=contact))(msRTCSIP-PrimaryUserAddress=*))");
localSearcher.Filter = localFilter;
SearchResultCollection localForestResult;
try
{
localForestResult = localSearcher.FindAll();
if (resourceForestResult != null)
{
foreach (SearchResult result in localForestResult)
{
if (result.Properties.Contains("mail"))
users.Add((string)result.Properties["mail"][0]);
}
}
}
catch (Exception ex)
{
}
return users;
}

Categories

Resources