I want to count some filtered data in List of objects (TSource), while processing it in result list (TResult). My method looks like:
int ratedUsersCount = 0;
return sourceList.
.GroupBy(x => x.UserId)
.Select(u => {
var count = u.Count();
if (count >= 10) ratedUsersCount ++;
return new ... //some new object with calculations
}
)
.OrderByDescending(_ => some field)
.ThenByDescending(_ => some other field)
.Take(20)
.Select((u, i) => {
u.Index = i + 1;
u.Rating = i / ratedUsersCount ;
return u;
};
);
So, does the variable ratedUsersCount use incorrectly?
How can I fix the issue?
Try splitting your code up, it will become much cleaner, easier to understand, and won't produce side effects from within LINQ.
var groupedUsers = sourceUsers.GroupBy(x => x.UserId);
var minimumCount = 10;
var ratedUserCount = grouperUsers.Count(x => x.Take(minimumCount + 1).Count() >= minimumCount);
var intermediateUsers = groupedUsers.Select(x => new { /* ... */ });
var filteredUsers = intermediateUsers.OrderByDescending(/* ... */).ThenByDescending(/* ... */).Take(20);
var finalUsers = filteredUsers.Select(/* ... */);
Note that this may be a little slower, since grouperUsers.Count(x => x.Count() >= 10) will enumerate the IEnumerable in order to apply the counting function.
Related
I have a dataset that is built gradually in parts, and as each part is done, I'm associating the entries with their DENSE_RANK() with the following code (source: implement dense rank with linq):
aQueryable.GroupBy(x => x)
.Where(g => g.Any())
.OrderBy(g => g.Key.SortOrder1)
.ThenBy(g => g.Key.SortOrder2)
.ThenBy(g => g.Key.SortOrder3)
.Select((g, i) =>
{
++i;
foreach (var x in g)
{
x.DenseRank = i;
}
return g;
}).Select(g => g.Key)
SQL equivalent: DENSE_RANK() OVER ( ORDER BY SortOrder1, SortOrder2, SortOrder3 )
However, the DenseRank that I'm computing here doesn't match the DENSE_RANK() I get in SQL once the entire dataset is written. I suspect this is because I'm computing my DENSE_RANK() on a subset of the full dataset.
Is there any way I can compute the same DENSE_RANK() as SQL without waiting for my entire dataset to finish populating first?
If you wanted to stream the results through an IEnumerable, you can OrderBy on the database and then write your own select, checking for rank:
var lastSort1 = default(int);
var lastSort2 = default(int);
var lastSort3 = default(int);
var firstRun = true;
var rank = 1;
iqueryable
.OrderBy(i => i.SortOrder1)
.ThenBy(i => i.SortOrder2)
.ThenBy(i => i.SortOrder3)
.AsEnumerable()
.Select(i =>
{
if (!firstRun && (lastSort1 != i.SortOrder1 || lastSort2 != i.SortOrder2 || lastSort3 != i.SortOrder3))
{
rank++;
}
firstRun = false;
lastSort1 = i.SortOrder1;
lastSort2 = i.SortOrder2;
lastSort3 = i.SortOrder3;
i.DenseRank = rank;
return i;
});
As soon as the first data comes through, you'll start receiving items through the IEnumerable, but eventually it will all be materialized.
Im having 5 users and 5 services.
I must catch if more than one user(s) have same service. So I created a dictionary:
//JMBGS represents users ids
List<string> multipleJMBGs = new List<string>();
multipleJMBGs.Add(data.jMBG);
multipleJMBGs.Add(data.secondPersonJMBG);
multipleJMBGs.Add(data.thirdPersonJMBG);
multipleJMBGs.Add(data.fourthPersonJMBG);
multipleJMBGs.Add(data.fifthPersonJMBG);
multipleJMBGs.RemoveAll(x => string.IsNullOrEmpty(x));
//object Service has Id property, and I need to check if more than one user has the same service
var serviceForClient = data?.Schedules[0]?.Service;
var serviceForFirstFamilyMember = data?.Schedules[1]?.Service;
var serviceForSecondFamilyMember = data?.Schedules[2]?.Service;
var serviceForThirdFamilyMember = data?.Schedules[3]?.Service;
var serviceForFourthFamilyMember = data?.Schedules[4]?.Service;
Dictionary<string, EmployeeTableDay.ServiceDTO> userService = new Dictionary<string, EmployeeTableDay.ServiceDTO>();
userService.Add(data.jMBG, serviceForClient);
userService.Add(data.secondPersonJMBG, serviceForFirstFamilyMember);
userService.Add(data.thirdPersonJMBG, serviceForSecondFamilyMember);
userService.Add(data.fourthPersonJMBG, serviceForThirdFamilyMember);
userService.Add(data.fifthPersonJMBG, serviceForFourthFamilyMember);
What I want to have as flag (true/false):
different users can have different services
different users can have same services
same users, can not have same service
same users can have multiple services
UPDATE #1:
List<string> multipleJMBGs = new List<string>();
multipleJMBGs.Add(data.jMBG);
multipleJMBGs.Add(data.secondPersonJMBG);
multipleJMBGs.Add(data.thirdPersonJMBG);
multipleJMBGs.Add(data.fourthPersonJMBG);
multipleJMBGs.Add(data.fifthPersonJMBG);
multipleJMBGs.RemoveAll(x => string.IsNullOrEmpty(x));
var countedSchedules = data?.Schedules?.Count();
var serviceIdForClient = 0;
var serviceIdForFirstFamilyMember = 0;
var serviceIdForSecondFamilyMember = 0;
var serviceIdForThirdFamilyMember = 0;
var serviceIdForFourthFamilyMember = 0;
List<int> servicesIds = new List<int>();
if(countedSchedules >= 1 && data?.Schedules[0]?.Service != null)
{
serviceIdForClient = data.Schedules[0].Service.Id;
servicesIds.Add(serviceIdForClient);
}
if (countedSchedules >= 2 && data?.Schedules[1]?.Service != null)
{
serviceIdForFirstFamilyMember = data.Schedules[1].Service.Id;
servicesIds.Add(serviceIdForFirstFamilyMember);
}
if (countedSchedules >= 3 && data?.Schedules[2]?.Service != null)
{
serviceIdForSecondFamilyMember = data.Schedules[2].Service.Id;
servicesIds.Add(serviceIdForSecondFamilyMember);
}
if (countedSchedules >= 4 && data?.Schedules[3]?.Service != null)
{
serviceIdForThirdFamilyMember = data.Schedules[3].Service.Id;
servicesIds.Add(serviceIdForThirdFamilyMember);
}
if (countedSchedules == 5 && data?.Schedules[4]?.Service != null)
{
serviceIdForFourthFamilyMember = data.Schedules[4].Service.Id;
servicesIds.Add(serviceIdForFourthFamilyMember);
}
servicesIds = servicesIds.Where(x => x > 0).ToList();
//because dictionary throws an exception
for (int i = 0; i < countedSchedules; i++)
{
multipleJMBGs[i] = i + " _ " + multipleJMBGs[i];
}
//create dictionary from two lists
var dic = multipleJMBGs.Zip(servicesIds, (k, v) => new { k, v }).ToDictionary(x => x.k, x => x.v);
var duplicateServices = dic.GroupBy(x => x.Value).Where(x => x.Count() > 1)
.Select(x => new { ServiceId = x.Key, Users = x.ToList() });
var duplicateUsers = dic.GroupBy(x => x.Key.Substring(3,x.Key.Length-3)).Where(x => x.Count() > 1)
.Select(x => new { User = x.Key, Services = x.ToList() });
if(duplicateServices > 1 && duplicateUsers.Count > 1)
{
//show a message that it ca't be proceeed further
}
else
{
//continue
}
You can group the services by Id to get information on duplicates:
var groups = data?.Schedules
.Select((service, index) => (service, index))
.GroupBy(x => x.service.Id);
foreach (var g in groups) {
if (g.Count() == 1) {
Console.WriteLine(
$"Service Id {g.Key} exists once for user index {g.First().index}");
} else {
string indexes = String.Join(", ", g.Select(t => t.index.ToString()));
Console.WriteLine(
$"Service Id {g.Key} is shared by user indexes {indexes}");
}
}
If you don't call multipleJMBGs.RemoveAll(...), then the indexes in the users list will match the indexes of the services. So you can easily get the JMBG assigned to a service.
var groups = data?.Schedules
.Select((service, index) => (service, index))
.GroupBy(x => x.service.Id);
foreach (var g in groups) {
if (g.Count() == 1) { // Service is unique
var service = g.First().service;
int index = g.First().index;
string user = multipleJMBGs[index];
...
} else { // Services assigned to more than one user.
var serviceId = g.Key;
var service = g.First().service;
foreach (var t in g) {
int index = t.index;
string user = multipleJMBGs[index];
...
}
...
}
}
Purely answering the question on how to check for duplicates within your dictionary:
You could group the values of your dictionary and look for duplicates that way.
var containsDuplicates = userService.GroupBy(x => x.Value).Any(x => x.Count() > 1);
If you want to actually see which services are used multiple times by which users, create the following grouping:
var duplicateServices = userService.GroupBy(x => x.Value).Where(x => x.Count() > 1)
.Select(x => new { Service = x.Key, Users = x.ToList() });
UPDATE #1:
You don't necessarily have to group up both of these lists if you just need to check if any of the two contain duplicates.
We could simply compare the total count of the lists with the count of the list when we remove all possible duplicates:
var duplicateServices = countedSchedules != data?.Schedules?.Distinct().Count();
var duplicateUsers = multipleJMBGs.Count() != multipleJMBGs.Distinct().Count();
UPDATE #2:
Sounds like you want to join the two lists together and not necessarily convert it to a dictionary, what we want to do instead is create a new List so we can group that on the combination of the two properties.
var containsDuplicates = multipleJMBGs.Zip(serviceIds, (JMBG, serviceId) => new { JMBG, serviceId })
.Where(x => !string.IsNullOrEmpty(x.JMBG) && x.serviceId > 0) // Filter AFTER zipping the two lists together
.GroupBy(x => x).Any(x => x.Count() > 1);
Do be aware that you are removing elements in both lists, meaning that the order might not be correct. You could instead add a Where clause after you zip the lists like in the example above.
I have several conditions that might affect what filters (.Where(...)) are used on a list. And at some point an exception is thrown, and I would like to know what actions have been called upon the list up until this point.
Is something like this possible?
var myList = new List<SomeClass>();
myList = myList.Where(item => item.property == value);
.
.
.
myList = myList.Where(item => item.otherProperty < otherValue);
Console.WriteLine(myList.ToActionsString());
It might print something like this:
list.Where(i => i.property == <the actual value>)
.Where(i => i.otherProperty < <the actual otherValue>)
Just calling toString() on the list does not exactly give any relevant information, and just listing the items in the list is not of interest.
Warning: there is overhead to this as the compiler has to create all of the Expression objects (which then have to be allocated and compiled at runtime). Use this sparingly.
You can do this using AsQueryable and relying on the ToString logic of the built in EnumerableQuery and Expression classes. The following extension method will convert your query to it's textual representation:
public static string GetText<T>(this IQueryable<T> query) {
retury query.Expression.ToString();
}
It can be used like:
var list = new List<int>();
var query = list.AsQueryable()
.Select((c, i) => c * (i + 1))
.Where(c => c > 5)
.Where(c => c < 10 && c != 7)
.Take(2)
.OrderBy(x => 1);
var text = query.GetText();
This results in the following:
System.Collections.Generic.List`1[System.Int32].Select((c, i) => (c * (i + 1))).Where(c => (c > 5)).Where(c => ((c < 10) AndAlso (c != 7))).Take(2).OrderBy(x => 1)
We can throw an anonymous type in the mix just to see how it looks:
var query = list.AsQueryable()
.Select((c, i) => c * (i + 1))
.Select(x => new { Value = x, ValueSquared = x * x });
var result = query.GetText();
Which will print:
System.Collections.Generic.List`1[System.Int32].Select((c, i) => (c * (i + 1))).Select(x => new <>f__AnonymousType0`2(Value = x, ValueSquared = (x * x)))
Through the use of Expression manipulation, we can make this method a little bit more robust. We can add in line breaks between the method calls and optionally strip off the name of the list's type.
public static string GetText<T>(this IQueryable<T> query, bool lineBreaks, bool noClassName)
{
var text = query.Expression.ToString();
if (!lineBreaks && !noClassName)
return text;
var expression = StripQuotes(query.Expression);
if (!(expression is MethodCallExpression mce))
return text;
if (lineBreaks)
{
var strings = new Stack<string>();
strings.Push(mce.ToString());
while (mce.Arguments.Count > 0 && mce.Arguments[0] is MethodCallExpression me)
{
strings.Push(me.ToString());
mce = me;
}
var sb = new StringBuilder(strings.Pop());
var len = sb.Length;
while (strings.TryPop(out var item))
{
sb.AppendLine().Append(item.Substring(len));
len = item.Length;
}
text = sb.ToString();
}
if (mce.Arguments.Count > 0 && mce.Arguments[0] is ConstantExpression ce)
{
var root = ce.Value.ToString();
if (root != null && text.StartsWith(root))
{
text = noClassName
? text.Substring(root.Length + 1)
: text.Insert(root.Length, Environment.NewLine);
}
}
return text;
}
// helper in case we get an actual Queryable in there
private static Expression StripQuotes(Expression e)
{
while (e.NodeType == ExpressionType.Quote)
e = ((UnaryExpression)e).Operand;
return e;
}
We can call this method as follows:
var list = new List<int>();
var query = list.AsQueryable()
.Select((c, i) => c * (i + 1))
.Where(c => c > 5)
.Where(c => c < 10 && c != 7)
.Take(2)
.OrderBy(x => 1);
var text = query.GetText(true, true);
Which will produce the following:
Select((c, i) => (c * (i + 1)))
.Where(c => (c > 5))
.Where(c => ((c < 10) AndAlso (c != 7)))
.Take(2)
.OrderBy(x => 1)
Note that this is very basic. It's not going to cover the case of closures (passing variables in) you'll get the <>DisplayClass objects written into your query. We can resolve that with an ExpressionVisitor that walks the expression and evaluates the ConstantExpressions representing the closures.
(Unfortunately I do not have time at the moment to provide that ExpressionVisitor solution, but stay tuned for an update)
No, it is not possible, at least not in a direct way as described in your question.
The List is not filtered step by step (i.e., apply Where(expr1) for all elements, then Where(expr2) for all remaining elements, ...), but in a deferred way:
If you request the first item of the resulting IEnumerable, LINQ evaluates the expr1 clause for each item until one item matches.
Then it checks whether this list item also matches expr2.
If it does, return it (or pass to further Where stages).
If it doesn't match, go back to step 1 and continue finding an item which matches expr1.
So logging by simply calling some ToActionsString() is difficult here. As already noted in the comments, it is probably much easier to just log when adding the Where-clauses, since you are in a known state then anyway:
if(condition1)
{
myList = myList.Where(item => item.Property == value);
Log($"Adding expression 1 with value '{value}'");
}
If your concern is that value could change before the IEnumerable is actually evaluated (captured variable), and you cannot restructure your control flow adequately, a workaround may be to create Func<T> objects which output the captured variables, and to evaluate those immediately before iterating the list:
List<Func<int>> values = new List<Func<int>>();
if(condition1)
{
myList = myList.Where(item => item.Property == value);
values.Add(() => value);
}
...
foreach(var v in values)
Log($"List will be filtered by {v()}");
var filteredList = myList.ToList();
Finally, you could call some logging function in your expressions, which logs the conditions and/or catches exceptions when evaluating those conditions:
myList.Where(item =>
{
Log(item, value);
return item.Property == value;
});
Is there a way to preserve the order after this linq expression?
var results =
DateList
.GroupBy(x => x.Date.Subtract(firstDay).Days / 7 + 1)
.SelectMany(gx => gx, (gx, x) => new {Week = gx.Key,DateTime =x,Count = gx.Count(),});
I found this Preserving order with LINQ , but I'm not sure if its the GroupBy or SelectMany casing the issues
Yes, if you first select your DateList and combine it with an index, using an overload of .Select that uses a delegate with a second (int) parameter that is called with the index of the items from the sequence :
DateList
.Select((dateTime, idx) => new {dateTime, idx})
.GroupBy(x => x.dateTime.Date.Subtract(firstDay).Days / 7 + 1)
...and persist the value through the linq chain
.SelectMany(gx => gx, (gx, x) => new {Week = gx.Key,
DateTime = x.dateTime,
Count = gx.Count(),
x.idx})
...then use it to re-order the output
.OrderBy(x => x.idx)
...and strip it from your final selection
.Select(x => new {x.Week, x.DateTime, x.Count});
then you can maintain the same order as the original list.
Solution of #spender is good, but can it be done without OrderBy? It can, because we can use the index for direct indexing into array, but it would not be one linq query:
var resultsTmp =
DateList.Select((d, i) => new { d, i })
.GroupBy(x => x.d.Date.Subtract(firstDay).Days / 7 + 1)
.SelectMany(gx => gx, (gx, x) => new { Week = gx.Key, DateTime = x.d, Count = gx.Count(), x.i })
.ToArray();
var resultsTmp2 = resultsTmp.ToArray();
foreach (var r in resultsTmp) { resultsTmp2[r.i] = r; };
var results = resultsTmp2.Select(r => new { r.Week, r.DateTime, r.Count });
It looks a bit complex. I would probably do something more straightforward like:
var DateList2 = DateList.Select(d => new { DateTime = d, Week = d.Subtract(firstDay).Days / 7 + 1 }).ToArray();
var weeks = DateList2.GroupBy(d => d.Week).ToDictionary(k => k.Key, v => v.Count());
var results = DateList2.Select(d2 => new { d2.Week, d2.DateTime, Count = weeks[d2.Week] });
I have an array of 2000 strings. The strings are: "art", "economy", "sport" and "politic". I want to group each 500 elements and get their counts
Could anyone help please?
Another solution:
var count = 0;
var dictionaries =
strings.GroupBy(s => count++ / 500)
.Select(g => g.Distinct().ToDictionary(k => k, k => g.Count(s => s == k)))
.ToList();
This will create a List<Dictionary<string, int>>. Each dictionary represents a tally of 500 elements (or possibly less for the last dictionary), where the keys are strings and the values are the number of occurrences of the string among the 500 elements the dictionary represents.
There is no requirement to hardcode all the possible values that may be encountered.
For the maximum possible performance you can also use this version:
var count = 0;
var dictionaries =
strings.GroupBy(s => count++ / 500)
.Select(g => g.Aggregate(
new Dictionary<string, int>(),
(d, w) => { d[w] = (d.ContainsKey(w) ? d[w] + 1 : 1); return d; })
)
.ToList();
This version iterates over each element in your source array exactly once. The output is in the same format as the first version.
var result = strings.Select((s, i) => new { s, i })
.GroupBy(x => x.i / 500)
.Select(x => x.GroupBy(y => y.s)
.Select(z => new {
Name=z.Key,
Count=z.Count()
}).ToList())
.ToList();
Try
var grouping = Enumerable.Range(0,2000)
.Select(i => i / 500)
.Zip(Strings, (i,s) => new { Group = i, Str = s})
.GroupBy(anon => anon.Group,
anon => anon.Str,
(key,g) => new
{
Key = key,
Art = g.Count(str => str == "art"),
Economy = g.Count(str => str == "economy"),
Politic = g.Count(str => str == "politic"),
Sport= g.Count(str => str == "sport")
});
foreach(anon in grouping)
{
//textbox logic OP will have to change to suit
TextBox1.WriteLine(String.Format("Group: {0}", anon.Key));
TextBox1.WriteLine(String.Format("Art: {0}",anon.Art));
TextBox1.WriteLine(String.Format("Economy: {0}",anon.Economy ));
TextBox1.WriteLine(String.Format("Politic: {0}",anon.Politic ));
TextBox1.WriteLine(String.Format("Sport: {0}",anon.Sport));
}
Alternatively (as per Snowbear)
var grouping = Strings.Select((s,i) => new { Group = i / 500, Str = s})
.GroupBy(anon => anon.Group,
anon => anon.Str,
(key,g) => new
{
Key = key,
Art = g.Count(str => str == "art"),
Economy = g.Count(str => str == "economy"),
Politic = g.Count(str => str == "politic"),
Sport= g.Count(str => str == "sport")
});
foreach(anon in grouping)
{
//textbox logic OP will have to change to suit
TextBox1.WriteLine(String.Format("Group: {0}",anon.Key + 1));
TextBox1.WriteLine(String.Format("Art: {0}",anon.Art));
TextBox1.WriteLine(String.Format("Economy: {0}",anon.Economy ));
TextBox1.WriteLine(String.Format("Politic: {0}",anon.Politic ));
TextBox1.WriteLine(String.Format("Sport: {0}",anon.Sport));
}
int CountElementsInGroup = 500;
//from 500 to 1000
int NumberGroup = 2;
string[] GroupTypes = new string[4] { "art", "economy", "sport", "politic" };
//Fill example array
string[] arr = new string[2000];
Random rand = new Random();
for (int i = 0; i < arr.Length;i++ )
arr[i] = GroupTypes[rand.Next(0, 3)];
var res = (from p in arr.Skip((NumberGroup - 1) * CountElementsInGroup).Take(CountElementsInGroup)
group p by p into g
select new GroupCountClass { GroupName = g.Key, GroupCount = g.Count() });
textBox1.Text = "";
foreach (GroupCountClass c in res)
{
textBox1.Text += String.Format("GroupName:{0} Count:{1};",c.GroupName,c.GroupCount);
}