LINQ Distinct not using the IEqualityComparer? [duplicate] - c#

This question already has answers here:
IEqualityComparer not working as intended
(3 answers)
Closed 7 years ago.
I have the following model:
public class Word {
public string Original { get; set; }
public string Normalized { get; set; }
public string Root { get; set; }
public string Subroot { get; set; }
public Regex SubrootRegex { get; set; }
}
I have created the following three IEqualityComparer<Word>:
public class NormalizedWordComparer : IEqualityComparer<Word> {
public bool Equals(Word x, Word y) {
return x.Normalized == y.Normalized;
}
public int GetHashCode(Word obj) {
return obj.GetHashCode();
}
}
public class RootWordComparer : IEqualityComparer<Word> {
public bool Equals(Word x, Word y) {
return x.Root == y.Root;
}
public int GetHashCode(Word obj) {
return obj.GetHashCode();
}
}
public class SubrootWordComparer : IEqualityComparer<Word> {
public bool Equals(Word x, Word y) {
return x.Subroot == y.Subroot;
}
public int GetHashCode(Word obj) {
return obj.GetHashCode();
}
}
In another class, I am trying to do the following:
_normalizedWords = ConfigurationFacade.Words.Select(w => {
Word word = new Word() { Original = w };
word.Normalized = Normalize(word, _filters);
word.Root = GetRoot(word.Normalized, ConfigurationFacade.WordRootPercentage);
word.Subroot = GetRoot(word.Root, ConfigurationFacade.WordSubrootPercentage);
word.SubrootRegex = null; //Complicated regex here
return word;
}).Distinct(new NormalizedWordComparer());
_wordRoots = _normalizedWords.Distinct(new RootWordComparer());
_wordSubroots = _wordRoots.Distinct(new SubrootWordComparer());
However, _normalizedWords, _wordRoots and _wordSubroots all end up with the same amount of elements, as if the Distinct() method didn't work or the comparer is being ignored.
I checked the elements with the debugger, and there are a lot that have the same Root value, so there should only be one of them in _wordRoots, but that's not the case, they are not removed or filtered.
Why is my Distinct() not working?

Why is my Distinct() not working?
Because Distinct first checks the hash code (since it's a quick check to see if two object could be equal) and then calls Equals. Since your GetHashCode implementations are all the same (and do not correspond to your Equals methods), Distinct is not working as you would expect.
Change your GetHashCode methods to correspond with Equals:
public class NormalizedWordComparer : IEqualityComparer<Word> {
public bool Equals(Word x, Word y) {
return x.Normalized == y.Normalized;
}
public int GetHashCode(Word obj) {
return obj.Normalized.GetHashCode();
}
}
public class RootWordComparer: IEqualityComparer<Word> {
public bool Equals(Word x, Word y) {
return x.Root == y.Root;
}
public int GetHashCode(Word obj) {
return obj.Root.GetHashCode();
}
}
public class SubrootWordComparer : IEqualityComparer<Word> {
public bool Equals(Word x, Word y) {
return x.Subroot == y.Subroot;
}
public int GetHashCode(Word obj) {
return obj.Subroot.GetHashCode();
}
}

Related

How to .GroupBy() by Id and by list property?

I have these classes:
public class AlertEvaluation
{
public string AlertId { get; set; }
public ICollection<EvaluatedTag> EvaluatedTags { get; set; }
public string TransactionId { get; set; }
public EvaluationStatus EvaluationStatus { get; set; }
public DateTime EvaluationDate { get; set; }
}
public class EvaluatedTag
{
public string Id { get; set; }
public string Name { get; set; }
}
And I would like to get a list of alert evaluations grouped by AlertId, and by EvaluatedTags, meaning that I would like to compare and group evaluations that not only have the same AlertId, but to also have the same list of EvaluatedTags. (And also get the last evaluation in time)
I tried this:
var evaluationsGroupedAndOrdered = evaluations.GroupBy(x => new { x.AlertSettingId, x.EvaluatedLabels })
.Select(x => x.OrderByDescending(z => z.EvaluationDate ).FirstOrDefault()).ToList();
But of course, the comparing of list properties like that did not work.
I read something about adding an equality comparer in GroupBy, which would mean comparing the lists inside the objects right? But I'm not sure of how to implement it in the right way.
I tried (based on GroupBy on complex object (e.g. List<T>)) :
public class AlertEvaluationComparer : IEqualityComparer<AlertEvaluation>
{
public bool Equals(AlertEvaluation x, AlertEvaluation y)
{
return x.AlertId == y.AlertId && x.EvaluatedTags.OrderBy(val => val.Name).SequenceEqual(y.EvaluatedTags.OrderBy(val => val.Name));
}
public int GetHashCode(AlertSettingEvaluation x)
{
return x.AlertId.GetHashCode() ^ x.EvaluatedTags.Aggregate(0, (a, y) => a ^ y.GetHashCode());
}
}
But did not work either.. Maybe because my list EvaluatedTags is not a list of strings but of individual objects.
Does anybody have a nice solution for this?
A typical way to compare two lists is to use the System.Linq exension method, SequenceEquals. This method returns true if both lists contain the same items, in the same order.
In order to make this work with an IEnumerable<EvaluatedTag>, we need to have a way to compare instances of the EvaluatedTag class for equality (determining if two items are the same) and for sorting (since the lists need to have their items in the same order).
To do this, we can override Equals and GetHashCode and implement IComparable<EvaluatedTag> (and might as well do IEquatable<EvaluatedTag> for completeness):
public class EvaluatedTag : IEquatable<EvaluatedTag>, IComparable<EvaluatedTag>
{
public string Id { get; set; }
public string Name { get; set; }
public int CompareTo(EvaluatedTag other)
{
if (other == null) return -1;
var result = string.CompareOrdinal(Id, other.Id);
return result == 0 ? string.CompareOrdinal(Name, other.Name) : result;
}
public bool Equals(EvaluatedTag other)
{
return other != null &&
string.Equals(other.Id, Id) &&
string.Equals(other.Name, Name);
}
public override bool Equals(object obj)
{
return Equals(obj as EvaluatedTag);
}
public override int GetHashCode()
{
return Id.GetHashCode() * 17 +
Name.GetHashCode() * 17;
}
}
Now we can use this in the custom comparer you have in your question, for sorting and comparing the EvaluatedTags:
public class AlertEvaluationComparer : IEqualityComparer<AlertEvaluation>
{
// Return true if the AlertIds are equal, and the EvaluatedTags
// contain the same items (call OrderBy to ensure they're in
// the same order before calling SequenceEqual).
public bool Equals(AlertEvaluation x, AlertEvaluation y)
{
if (x == null) return y == null;
if (y == null) return false;
if (!string.Equals(x.AlertId, y.AlertId)) return false;
if (x.EvaluatedTags == null) return y.EvaluatedTags == null;
if (y.EvaluatedTags == null) return false;
return x.EvaluatedTags.OrderBy(et => et)
.SequenceEqual(y.EvaluatedTags.OrderBy(et => et));
}
// Use the same properties in GetHashCode that were used in Equals
public int GetHashCode(AlertEvaluation obj)
{
return obj.AlertId?.GetHashCode() ?? 0 * 17 +
obj.EvaluatedTags?.Sum(et => et.GetHashCode() * 17) ?? 0;
}
}
And finally we can pass your AlertEvaluationComparer to the GroupBy method to group our items:
var evaluationsGroupedAndOrdered = evaluations
.GroupBy(ae => ae, new AlertEvaluationComparer())
.OrderBy(group => group.Key.EvaluationDate)
.ToList();
Here's a go at it, getting away from Linq a bit to make it easier to build the groups one at a time while leveraging sorting:
// Build groups by using a combination of AlertId and EvaluatedTags hashcode as group key
var groupMap = new Dictionary<string, SortedSet<AlertEvaluation>>();
foreach (var item in evals)
{
var combinedKey = item.AlertId + EvaluatedTag.GetCollectionHashCode(item.EvaluatedTags);
if (groupMap.TryGetValue(combinedKey, out SortedSet<AlertEvaluation>? groupItems))
{
// Add to existing group
groupItems.Add(item);
}
else
{
// Create new group
groupMap.Add(combinedKey, new SortedSet<AlertEvaluation> { item });
}
}
// Get a list of groupings already sorted ascending by EvaluationDate
List<SortedSet<AlertEvaluation>>? groups = groupMap.Values.ToList();
This assumes that the classes implement IComparable and Equals/GetHashCode to facilitate sorting:
public class AlertEvaluation : IComparable<AlertEvaluation>
{
public string AlertId { get; set; }
public ICollection<EvaluatedTag> EvaluatedTags { get; set; }
public string TransactionId { get; set; }
public EvaluationStatus EvaluationStatus { get; set; }
public DateTime EvaluationDate { get; set; }
// Used by SortedSet
public int CompareTo(AlertEvaluation? other)
{
if (other is null)
{
return 1;
}
return EvaluationDate.CompareTo(other.EvaluationDate);
}
}
public class EvaluatedTag : IEquatable<EvaluatedTag?>
{
public string Id { get; set; }
public string Name { get; set; }
public bool Equals(EvaluatedTag? other) => other != null && Id == other.Id && Name == other.Name;
public override int GetHashCode() => HashCode.Combine(Id, Name);
// Helper to get a hash of item collection
public static int GetCollectionHashCode(ICollection<EvaluatedTag> items)
{
var code = new HashCode();
foreach (var item in items.OrderBy(i => i.Id))
{
code.Add(item);
}
return code.ToHashCode();
}
}
By the way, I'm using the fancy new HashCode class in .NET Core to override hash codes.

How do you write a GetHashCode method for an object made of a string and a collection of int32?

There is a class of Products:
public class ProductWithFeatures
{
public string Name { get; set; }
public ICollection<Feature> Features { get; set; }
}
public class Feature
{
public int Id { get; set; }
public Feature(int Id)
{
this.Id = Id;
}
}
I want to write an IEqualityComparer for this (i already have one for Feature).
The one for Feature is like this:
public class FeatureComparer : IEqualityComparer<Feature>
{
public bool Equals(Feature x, Feature y)
{
return x.Id == y.Id;
}
public int GetHashCode(Feature obj)
{
return obj.Id;
}
}
And what i wrote so far on the other one is this:
public class ProductComparer : IEqualityComparer<LinqHomework.ProductWithFeatures>
{
public bool Equals(ProductWithFeatures x, ProductWithFeatures y)
{
return x.Name == y.Name && LinqHomework.FeatureComparer.Equals(x.Features, y.Features);
}
public int GetHashCode(ProductWithFeatures obj)
{
}
}
I can't find an answer anywhere about this. Does anybody know how to write it?
Two ProductWithFeaturess are equal if they have the same name, and have the same features in the same order.
public class ProductComparer : IEqualityComparer<LinqHomework.ProductWithFeatures>
{
public bool Equals(ProductWithFeatures x, ProductWithFeatures y)
{
return x.Name == y.Name && x.Features.SequenceEqual(y.Features, new LinqHomework.FeatureComparer());
}
public int GetHashCode(ProductWithFeatures obj)
{
int hash = obj.Name.GetHashCode();
var featureComparer = new LinqHomework.FeatureComparer();
foreach (var feature in obj.Features)
{
hash = hash * 23 + featureComparer.GetHashCode(feature);
}
return hash;
}
}
This is a simple approach, which can be improved in a number of ways.
First, let's give our FeatureComparer a Default property, so we don't need to keep creating new instances:
public class FeatureComparer : IEqualityComparer<Feature>
{
public static FeatureComparer Default { get; } = new FeatureComparer();
// ... as before
}
This lets us write:
public class ProductComparer : IEqualityComparer<LinqHomework.ProductWithFeatures>
{
public bool Equals(ProductWithFeatures x, ProductWithFeatures y)
{
return x.Name == y.Name && x.Features.SequenceEqual(y.Features, LinqHomework.FeatureComparer.Default);
}
public int GetHashCode(ProductWithFeatures obj)
{
int hash = obj.Name.GetHashCode();
foreach (var feature in obj.Features)
{
hash = hash * 23 + LinqHomework.FeatureComparer.Default.GetHashCode(feature);
}
return hash;
}
}
We're also not handling the case where our methods are passed null, or the name of a feature is null, so let's deal with those. We can also test whether x and y are the same object in Equals.
We'll also do the integer operations in an unchecked block in case it overflows (and the assembly is compiled with /checked).
Note that we use ReferenceEquals instead of ==, in case you end up implementing the == operator in your types.
public class ProductComparer : IEqualityComparer<LinqHomework.ProductWithFeatures>
{
public bool Equals(ProductWithFeatures x, ProductWithFeatures y)
{
if (ReferenceEquals(x, y))
return true;
if (ReferenceEquals(x, null) || ReferenceEquals(y, null))
return false;
if (x.Name != y.Name)
return false;
if (ReferenceEquals(x.Features, y.Features))
return true;
if (ReferenceEquals(x.Features, null) || ReferenceEquals(y.Features, null))
return false;
if (!x.Features.SequenceEquals(y.Features, LinqHomework.FeatureComparer.Default))
return false;
return true;
}
public int GetHashCode(ProductWithFeatures obj)
{
if (ReferenceEquals(obj, null))
return 0;
unchecked
{
int hash = obj.Name?.GetHashCode() ?? 0;
if (!ReferenceEquals(obj.Features, null))
{
foreach (var feature in obj.Features)
{
hash = hash * 23 + LinqHomework.FeatureComparer.Default.GetHashCode(feature);
}
return hash;
}
}
}
}
It's really up to you. I personally would go for something like
public int GetHashCode( ProductWithFeatures obj )
{
string toHash = obj.Name;
foreach( var feature in obj.Features )
toHash += feature.GetHashCode();
return toHash.GetHashCode();
}
It's not the nicest code ever, but it does what it's supposed to do.

How to enforce class types when comparing their int id properties

I have several classes with id property of the same type int?:
public class Person {
public int? id { get; set; }
}
public class Project {
public int? id { get; set; }
}
// etc...
When writing code it happened that I compared semantically wrong types:
if (person.id == project.id), and of course there was no warning until I found the bug.
How could I create some kind of underlying type enforcement, or even better, a compiler warning, or something like that, that warns me not everything looks o.k.?
I can think of creating an Equals(Person p) { return p.id == this.id } but I'd prefer some other mechanism that could be used more 'freely'.
You need to override Equals and GetHashCode to be able to compare objects directly.
Try like this:
public sealed class Person : IEquatable<Person>
{
private readonly int? _id;
public int? Id { get { return _id; } }
public Person(int? id)
{
_id = id;
}
public override bool Equals(object obj)
{
if (obj is Person)
return Equals((Person)obj);
return false;
}
public bool Equals(Person obj)
{
if (obj == null) return false;
if (!EqualityComparer<int?>.Default.Equals(_id, obj._id)) return false;
return true;
}
public override int GetHashCode()
{
int hash = 0;
hash ^= EqualityComparer<int?>.Default.GetHashCode(_id);
return hash;
}
public override string ToString()
{
return String.Format("{{ Id = {0} }}", _id);
}
public static bool operator ==(Person left, Person right)
{
if (object.ReferenceEquals(left, null))
{
return object.ReferenceEquals(right, null);
}
return left.Equals(right);
}
public static bool operator !=(Person left, Person right)
{
return !(left == right);
}
}
public sealed class Project : IEquatable<Project>
{
private readonly int? _id;
public int? Id { get { return _id; } }
public Project(int? id)
{
_id = id;
}
public override bool Equals(object obj)
{
if (obj is Project)
return Equals((Project)obj);
return false;
}
public bool Equals(Project obj)
{
if (obj == null) return false;
if (!EqualityComparer<int?>.Default.Equals(_id, obj._id)) return false;
return true;
}
public override int GetHashCode()
{
int hash = 0;
hash ^= EqualityComparer<int?>.Default.GetHashCode(_id);
return hash;
}
public override string ToString()
{
return String.Format("{{ Id = {0} }}", _id);
}
public static bool operator ==(Project left, Project right)
{
if (object.ReferenceEquals(left, null))
{
return object.ReferenceEquals(right, null);
}
return left.Equals(right);
}
public static bool operator !=(Project left, Project right)
{
return !(left == right);
}
}
I also implemented IEquatable<Person> and == and != for good measure.
Now you can write person1 == this if this is a Person, but you would have a compiler error if this were a Project.
This is what tests are for. This is why you should write tests. Tests should pick up on these kind of errors.
But if you really want to go overkill, create a custom struct to store your IDs:
public struct Id<T> {
public int? ID { get; }
public static implicit operator Id<T>(int id) {
return new Id<T>(id);
}
public Id(int? id) { ID = id; }
public static bool operator ==(Id<T> lhs, Id<T> rhs) {
return lhs.ID == rhs.ID;
}
public static bool operator !=(Id<T> lhs, Id<T> rhs) {
return lhs.ID != rhs.ID;
}
}
// usage:
public class Person {
public Id<Person> Id { get; set; }
}
public class Project {
public Id<Project> Id { get; set; }
}
Whenever you try to compare Person.Id with Project.Id, the compiler will give you an error because you are comparing Id<Project> and Id<Person>.

How to generate a unique hash for a collection of objects independent of their order [duplicate]

This question already has answers here:
Getting hash of a list of strings regardless of order
(5 answers)
Closed 8 years ago.
Let's say I have a class
public class MyClass
{
public string Type { get; set; }
public int Id { get; set; }
}
and I have a collection class that is simply a strongly typed List
public class MyClassList : List<MyClass>
{
public MyClassList(IEnumerable<MyClass> enumerable) : base (enumerable) {}
}
I want MyClassList to be able to generate a unique hash-code for MyClassList based on the contents. The hash-code of MyClass should be based on both properties. The hash-code of MyClassList should be the same even if the order of the objects is different.
To handle the ordering issue I was thinking I could order the list before generating the hash-code, but I'm not sure how to generate the hash-code of the list.
For optimal performance I would try to avoid iterating the whole collection every time GetHashCode is called. The purpose of GetHashCode is to improve performance to a point better than evaluating every element. So I might try maintaining the hash code when elements in the list are changed like this.
class Program
{
static void Main(string[] args)
{
MyClassList l = new MyClassList() { new MyClass() {Type="Bob", Id=1}, new MyClass() {Type="Jones", Id=2}};
MyClassList l2 = new MyClassList() { new MyClass() { Type = "Jones", Id = 2 }, new MyClass() { Type = "Bob", Id = 1 } };
MyClassList l3 = new MyClassList() { new MyClass() { Type = "Jones", Id = 2 }};
Console.WriteLine("{0} {1} {2}", l.GetHashCode(), l2.GetHashCode(), l3.GetHashCode());
l3.Add(new MyClass() { Type = "Bob", Id = 1 });
Console.WriteLine("{0}", l3.GetHashCode());
}
}
public class MyClass
{
public string Type { get; set; }
public int Id { get; set; }
public override int GetHashCode()
{
return (Type.GetHashCode() % 0x8000) | (int)((uint)Id.GetHashCode() & 0xFFFF0000);
}
}
public class MyClassList : IList<MyClass>
{
List<MyClass> internalList;
int hashCode = 0;
public MyClassList()
{
internalList = new List<MyClass>();
}
private void IncludeInHash(MyClass item)
{
hashCode ^= item.GetHashCode();
}
private void ExcludeFromHash(MyClass item)
{
IncludeInHash(item);
}
public override int GetHashCode()
{
return hashCode;
}
public int IndexOf(MyClass item)
{
return internalList.IndexOf(item);
}
public void Insert(int index, MyClass item)
{
internalList.Insert(index, item);
// Make sure Insert is successful (doesn't throw an exception) before affecting the hash
IncludeInHash(item);
}
public void RemoveAt(int index)
{
MyClass reduce = internalList[index];
internalList.RemoveAt(index);
// Make sure RemoveAt is successful before affecting the hash
ExcludeFromHash(reduce);
}
public MyClass this[int index]
{
get
{
return internalList[index];
}
set
{
MyClass reduce = internalList[index];
internalList[index] = value;
// Make sure these happen atomically; don't allow exceptions to prevent these from being accurate.
ExcludeFromHash(reduce);
IncludeInHash(value);
}
}
public void Add(MyClass item)
{
internalList.Add(item);
IncludeInHash(item);
}
public void Clear()
{
internalList.Clear();
hashCode = 0;
}
public bool Contains(MyClass item)
{
return internalList.Contains(item);
}
public void CopyTo(MyClass[] array, int arrayIndex)
{
internalList.CopyTo(array, arrayIndex);
}
public int Count
{
get { return internalList.Count; }
}
public bool IsReadOnly
{
get { return false; }
}
public bool Remove(MyClass item)
{
if (internalList.Remove(item))
{
ExcludeFromHash(item);
return true;
}
else
return false;
}
public IEnumerator<MyClass> GetEnumerator()
{
return internalList.AsReadOnly().GetEnumerator();
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
The solution given by clto works. Here is an alternative: sort the list by some total ordering (any ordering will do, as long as it is unambiguous). Then you can calculate the hash code using any normal means. You don't need order-independence. You could even use a cryptographic hash function.
I propose this solution (I didn't implement the Equals method) :
public class MyClass
{
public string Type { get; set; }
public int Id { get; set; }
public override int GetHashCode()
{
int hash = 17;
hash = hash + 23 * this.Type.GetHashCode();
hash = hash + 23 * this.Id.GetHashCode();
return hash;
}
}
public class MyClassList : List<MyClass>
{
public MyClassList(IEnumerable<MyClass> enumerable) : base(enumerable) { }
public override int GetHashCode()
{
return this.Aggregate(17, (state, current) => state * 23 + current.GetHashCode());
}
}
The way to generate the hashcode is inspired from Microsoft method to compute the hash value for anonymous objects.
If the order isn't important then you should use a collection that inherently is a set, rather than a list.
Also, it's generally best to not inherit from collections; use composition instead.
So for a collection you can use a HashSet, as it will have set semantics.
To have MyClass use both properties as it's identity just override it's equals and get hash code implementations, or create an IComparer<MyClass> if you can't or don't want to do that.
public class MyClass:IEquatable<MyClass>
{
public string Type { get; set; }
public int Id { get; set; }
public override bool Equals(object obj)
{
return Equals(obj as MyClass);
}
public bool Equals(MyClass other)
{
if (other == null)
return false;
return Type == other.Type &&
Id == other.Id;
}
public override int GetHashCode()
{
return Type.GetHashCode() * 79 + Id;
}
}
Then your collection is as simple as:
HashSet<MyClass> set = new HashSet<MyClass>();
And if you want to compare various sets just use:
HashSet<MyClass>.CreateSetComparer();

Custom object using Except failing to use IEqualityComparer<T>

here is the object code:
public class DlpItem : IEqualityComparer<DlpItem>
{
public string Text { get; set; }
public int Id { get; set; }
public DlpItem(int pId)
{
Text = string.Empty;
Id = pId;
}
public override bool Equals(object obj)
{
return Id == (obj as DlpItem).Id;
}
public bool Equals(DlpItem a, DlpItem b)
{
return a.Id == b.Id;
}
public int GetHashCode(DlpItem item)
{
return Id.GetHashCode();
}
}
And I have two lists as follows:
var list1 = new List<DlpItem>();
list1.Add(new DlpItem(1));
list1.Add(new DlpItem(2));
var list2 = new List<DlpItem>();
list2.Add(new DlpItem(1));
list2.Add(new DlpItem(2));
var delItems = list1.Except(list2).ToList<DlpItem>();
delItems always has both items in it. What am I missing here?
EDIT: Code now implements IEquatable
public class DlpItem : IEqualityComparer<DlpItem>, IEquatable<DlpItem>
{
public string Text { get; set; }
public int Id { get; set; }
public override bool Equals(object obj)
{
return Id - (obj as DlpItem).Id == 0;
}
public bool Equals(DlpItem a, DlpItem b)
{
return a.Id == b.Id;
}
public bool Equals(DlpItem item)
{
return item != null && Id == item.Id;
}
public int GetHashCode(DlpItem item)
{
return Id.GetHashCode();
}
}
In your example, you don't actually add anything to list2... a simple enough mistake, but there is a more significant issue:
It needs to be IEquatable<T> not an IEqualityComparer<T>; also, you might want to ensure the hashcode can't change; most simply by making Id read-only:
public class DlpItem : IEquatable<DlpItem>
{
public string Text { get; set; }
private readonly int id;
public int Id { get { return id; } }
public DlpItem(int id)
{
Text = "";
this.id = id;
}
public override bool Equals(object obj)
{
return Equals(obj as DlpItem);
}
public bool Equals(DlpItem other)
{
return other != null && this.Id == other.Id;
}
public override int GetHashCode()
{
return Id.GetHashCode();
}
}

Categories

Resources