Performance Benchmarking of Contains, Exists and Any

Performance Benchmarking of Contains, Exists and Any - c#

I have been searching for a performance benchmarking between Contains, Exists and Any methods available in the List<T>. I wanted to find this out just out of curiosity as I was always confused among these. Many questions on SO described definitions of these methods such as:
LINQ Ring: Any() vs Contains() for Huge Collections
Linq .Any VS .Exists - Whats the difference?
LINQ extension methods - Any() vs. Where() vs. Exists()
So I decided to do it myself. I am adding it as an answer. Any more insight on the results is most welcomed. I also did this benchmarking for arrays to see the results

The fastest way is to use a HashSet.
The Contains for a HashSet is O(1).
I took your code and added a benchmark for HashSet<int>
The performance cost of HashSet<int> set = new HashSet<int>(list); is nearly zero.
Code
void Main()
{
ContainsExistsAnyVeryShort();
ContainsExistsAnyShort();
ContainsExistsAny();
}
private static void ContainsExistsAny()
{
Console.WriteLine("***************************************");
Console.WriteLine("********* ContainsExistsAny ***********");
Console.WriteLine("***************************************");
List<int> list = new List<int>(6000000);
Random random = new Random();
for (int i = 0; i < 6000000; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
HashSet<int> set = new HashSet<int>(list);
find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedMilliseconds}ms");
}
private static void ContainsExistsAnyShort()
{
Console.WriteLine("***************************************");
Console.WriteLine("***** ContainsExistsAnyShortRange *****");
Console.WriteLine("***************************************");
List<int> list = new List<int>(2000);
Random random = new Random();
for (int i = 0; i < 2000; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
HashSet<int> set = new HashSet<int>(list);
find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedMilliseconds}ms");
}
private static void ContainsExistsAnyVeryShort()
{
Console.WriteLine("*******************************************");
Console.WriteLine("***** ContainsExistsAnyVeryShortRange *****");
Console.WriteLine("*******************************************");
List<int> list = new List<int>(10);
Random random = new Random();
for (int i = 0; i < 10; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
HashSet<int> set = new HashSet<int>(list);
find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedTicks} ticks");
}
private static void find(List<int> list, int[] arr, HashSet<int> set, Func<string, Stopwatch, string> format)
{
Random random = new Random();
int[] find = new int[10000];
for (int i = 0; i < 10000; i++)
{
find[i] = random.Next(6000000);
}
Stopwatch watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine(format("List/Contains", watch));
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Exists(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine(format("List/Exists", watch));
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Any(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine(format("List/Any", watch));
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
arr.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine(format("Array/Contains", watch));
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
Array.Exists(arr, a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine(format("Array/Exists", watch));
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
Array.IndexOf(arr, find[rpt]);
}
watch.Stop();
Console.WriteLine(format("Array/IndexOf", watch));
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
arr.Any(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine(format("Array/Any", watch));
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
set.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine(format("HashSet/Contains", watch));
}
RESULTS
*******************************************
***** ContainsExistsAnyVeryShortRange *****
*******************************************
List/Contains: 1067 ticks
List/Exists: 2884 ticks
List/Any: 10520 ticks
Array/Contains: 1880 ticks
Array/Exists: 5526 ticks
Array/IndexOf: 601 ticks
Array/Any: 13295 ticks
HashSet/Contains: 6629 ticks
***************************************
***** ContainsExistsAnyShortRange *****
***************************************
List/Contains: 4ms
List/Exists: 28ms
List/Any: 138ms
Array/Contains: 6ms
Array/Exists: 34ms
Array/IndexOf: 3ms
Array/Any: 96ms
HashSet/Contains: 0ms
***************************************
********* ContainsExistsAny ***********
***************************************
List/Contains: 11504ms
List/Exists: 57084ms
List/Any: 257659ms
Array/Contains: 11643ms
Array/Exists: 52477ms
Array/IndexOf: 11741ms
Array/Any: 194184ms
HashSet/Contains: 3ms
Edit (2021-08-25)
I added a comparison for very short collections (10 items) and also added Array.Contains and Array.IndexOf. You can see that Array.IndexOf is the fastest for such small ranges. That is, because as #lucky-brian said n is so small here, that a for-loop performs better than a somewhat complex search algorithm. However I still advice to use HashSet<T> whenever possible, as it better reflects the intend of only having unique values and the difference for small collections is below 1ms.

According to documentation:
List.Exists (Object method)
Determines whether the List(T) contains elements that match the
conditions defined by the specified predicate.
IEnumerable.Any (Extension method)
Determines whether any element of a sequence satisfies a condition.
List.Contains (Object Method)
Determines whether an element is in the List.
Benchmarking:
CODE:
static void Main(string[] args)
{
ContainsExistsAnyShort();
ContainsExistsAny();
}
private static void ContainsExistsAny()
{
Console.WriteLine("***************************************");
Console.WriteLine("********* ContainsExistsAny ***********");
Console.WriteLine("***************************************");
List<int> list = new List<int>(6000000);
Random random = new Random();
for (int i = 0; i < 6000000; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
find(list, arr);
}
private static void ContainsExistsAnyShort()
{
Console.WriteLine("***************************************");
Console.WriteLine("***** ContainsExistsAnyShortRange *****");
Console.WriteLine("***************************************");
List<int> list = new List<int>(2000);
Random random = new Random();
for (int i = 0; i < 2000; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
find(list, arr);
}
private static void find(List<int> list, int[] arr)
{
Random random = new Random();
int[] find = new int[10000];
for (int i = 0; i < 10000; i++)
{
find[i] = random.Next(6000000);
}
Stopwatch watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine("List/Contains: {0:N0}ms", watch.ElapsedMilliseconds);
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Exists(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine("List/Exists: {0:N0}ms", watch.ElapsedMilliseconds);
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Any(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine("List/Any: {0:N0}ms", watch.ElapsedMilliseconds);
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
arr.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine("Array/Contains: {0:N0}ms", watch.ElapsedMilliseconds);
Console.WriteLine("Arrays do not have Exists");
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
arr.Any(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine("Array/Any: {0:N0}ms", watch.ElapsedMilliseconds);
}
RESULTS
***************************************
***** ContainsExistsAnyShortRange *****
***************************************
List/Contains: 96ms
List/Exists: 146ms
List/Any: 381ms
Array/Contains: 34ms
Arrays do not have Exists
Array/Any: 410ms
***************************************
********* ContainsExistsAny ***********
***************************************
List/Contains: 257,996ms
List/Exists: 379,951ms
List/Any: 884,853ms
Array/Contains: 72,486ms
Arrays do not have Exists
Array/Any: 1,013,303ms

It is worth mentioning that this comparison is a bit unfair, since the Array class doesn't own the Contains() method. It uses an extension method for IEnumerable<T> via a sequential Enumerator, hence it is not optimized for Array instances. On the other side, HashSet<T> has its own implementation fully optimized for all sizes.
To compare fairly you could use the static method int Array.IndexOf() which is implemented for Array instances, even though it uses a for loop slightly more efficient that an Enumerator.
Using a fair comparison algorithm, the performance for small sets of up to 5 elements of HashSet<T>.Contains() is similar to the Array.IndexOf() but it is much more efficient for larger sets.

Related

Stopwatch startup issues

I've got a c# program that tests a sort algorithm and it's performance by using a instance of the Stopwatch class.
So far everything is working correctly and I am getting the expected tick results except in the first run.Somehow the Stopwatch class needs about 900 ticks longer for the first calculation.
Do I have to initiate the Stopwatch class somehow different or is there any way to fix this?
static void Main() {
watch = new Stopwatch();
int amount = 10; // Amount of arrays to test
long[, ] stats= new long[3, amount]; // Array that stores ticks for every size (100,1000,10000) 'amount'-times
for (int size = 100, iteration = 0; size <= 10000; size *= 10, iteration++) {
for (int j = 0; j < amount; j++) {
stats[iteration, j] = TestSort(size); // Save ticks for random tested array in stats
}
}
PrintStats(stats);
}
public static long TestSort(int length) {
int[] testArray = GenerateRandomArray(length); // Generate a random array with size of length
watch.Reset();
watch.Start();
sort(testArray);
watch.Stop();
return watch.ElapsedTicks;
}
public static void PrintStats(long[, ] array) {
for (int i = 0; i < array.GetLength(0); i++) {
Console.Write("[");
for (int j = 0; j < array.GetLength(1); j++) {
Console.Write(array[i, j]);
if (j < array.GetLength(1) - 1) {
Console.Write(",");
}
}
Console.Write("]\n");
}
}
// Sample output
// Note that first entry is about 900 ticks longer then the other ones with size 100
[1150,256,268,262,261,262,263,261,263,262]
[19689,20550,20979,22953,19913,20578,19693,19945,19811,19970]
[1880705,1850265,3006533,1869953,1900301,1846915,1840681,1801887,1931206,2206952]

Ways to Improve generic Dictionary performance

I have a Dictinary<int, int> which populated with ~5Mio records.
While the performance is reasonably good considering the volume of data I'm looking to improve it. I don't care about data population my main concern is data retrieval.
First thing I'd done - I changed value type from decimal to int which got me twice better performance.
Then I tried trading 'genericness' for speed by passing non-generic IntComparer into Dictionary's ctor as follows:
public class IntegerComparer : IEqualityComparer<int>
{
public bool Equals(int x, int y)
{
return x == y;
}
public int GetHashCode(int obj)
{
return obj;
}
}
but to no avail, performance got degraded by 20%. SortedDictionary slowed things down by 10 times (didn't have much hope on it though). Wonder what can be done for improving the performance if any?
here's a synthetic test just for measuring performance:
var d = new Dictionary<int, int>();
for (var i = 0; i < 5000000; i++)
{
d.Add(i, i + 5);
}
var r = new Random();
var s = new Stopwatch();
s.Start();
for (var i = 0; i < 100000; i++)
{
var r0 = Enumerable.Range(1, 255).Select(t => r.Next(5000000));
var values = r0.Select(t => d[t]).ToList();
}
s.Stop();
MessageBox.Show(s.ElapsedMilliseconds.ToString());

As the comments point out your test is seriously flawed...
If the highest index you will see is 5,000,0000 then an array will be the most performant option. I've tried to quickly rewrite your test to try an eliminate some of the error. There will probably be mistakes, writing accurate benchmarks is hard.
static void Main(string[] args)
{
var loopLength = 100000000;
var d = new Dictionary<int, int>();
for (var i = 0; i < 5000000; i++)
{
d.Add(i, i + 5);
}
var ignore = d[7];
var a = new int[5000000];
for (var i = 0; i < 5000000; i++)
{
a[i] = i + 5;
}
ignore = a[7];
var s = new Stopwatch();
var x = 1;
s.Start();
for (var i = 0; i < loopLength; i++)
{
x = (x * 1664525 + 1013904223) & (4194303);
var y = d[x];
}
s.Stop();
Console.WriteLine(s.ElapsedMilliseconds);
s.Reset();
x = 1;
s.Start();
for (var i = 0; i < loopLength; i++)
{
x = (x * 1664525 + 1013904223) & (4194303);
var y = a[x];
}
s.Stop();
Console.WriteLine(s.ElapsedMilliseconds);
Console.ReadKey(true);
}
x coefficients borrowed from Wikipedia's Linear congruential generator article
My results:
24390
2076
That makes the array over 12x faster

C# Array / List performance difference

Consider the following methods:
static Stopwatch ArrayTest(int size)
{
var arr = new int[size];
Stopwatch stw = new Stopwatch();
stw.Start();
for (int i = 0; i < size; i++)
{
arr[i] = i / div;
}
var rnd = new Random(1);
var sz2 = size / div;
for (int i = 0; i < sz2; i++)
{
var sz = size - i;
var ix = rnd.Next(sz);
Array.Copy(arr, ix + 1, arr, ix, size - ix - 1);
arr[sz - 1] = 0;
}
double sum = 0.0;
for (int i = 0; i < size - sz2; i++)
{
sum += arr[i];
}
stw.Stop();
Console.Write(" Array: {0}", sum);
return stw;
}
static Stopwatch ListTest(int size)
{
var lst = new List<int>();
Stopwatch stw = new Stopwatch();
stw.Start();
for (int i = 0; i < size; i++)
{
lst.Add(i / div);
}
var rnd = new Random(1);
var sz2 = size / div;
for (int i = 0; i < sz2; i++)
{
var ix = rnd.Next(lst.Count);
lst.RemoveAt(ix);
}
double sum = 0.0;
for (int i = 0; i < lst.Count; i++)
{
sum += lst[i];
}
stw.Stop();
Console.Write(" List: {0}", sum);
return stw;
}
div = 2 and size = 200000.
Running this (complied at Release) produces the following:
Array: 5012641699 12.8367529 s
List: 5012641699 6.1027289 s
According to http://referencesource.microsoft.com/#mscorlib List.RemoveAt is implemented like this:
// Removes the element at the given index. The size of the list is
// decreased by one.
//
public void RemoveAt(int index) {
if ((uint)index >= (uint)_size) {
ThrowHelper.ThrowArgumentOutOfRangeException();
}
Contract.EndContractBlock();
_size--;
if (index < _size) {
Array.Copy(_items, index + 1, _items, index, _size - index);
}
_items[_size] = default(T);
_version++;
}
So I wonder why the ArrayTest requires twice the time that ListTest does. It seems to me that they are both doing pretty much the same thing, in fact I'd expect ArrayTest to be faster due to less overhead. Perhaps I have missed something obvious?
Update
Let me explain the example. The idea was to measure the performance under conditions of random deletes (while still retaining indexed access). The first part of each method initializes an array/list where each element in the array is 1/2 the index (integer division). The next part randomly removes 1/2 the elements. The last loop simply sums the values remaining in the array/list which is used as a checksum (to compare results). Profiling the code shows that in ArrayTest the Array.Copy uses the most time and in ListTest the lst.RemoveAt uses the most time.
I will attempt to replicate the behavior in a simpler example and update it here.

The problem seems to be with the line:
Array.Copy(arr, ix + 1, arr, ix, size - ix - 1);
in method ArrayTest. It should read
Array.Copy(arr, ix + 1, arr, ix, sz - ix - 1);
since the variable size is not changed when the array is resized, instead sz, the actual size, is calculated in each iteration.

There is a flaw in your test program. You've made it overly convoluted and thus hard to prove on paper. The only way to figure out what your code is doing is by debugging, hand-calculating everything.
So instead I made a simpler test.
void Main()
{
const int LENGTH = 200000;
Stopwatch sw = Stopwatch.StartNew();
var a = new int[LENGTH];
for (int index = LENGTH-1; index > 0; index--)
Array.Copy(a, 1, a, 0, index);
sw.Stop();
sw.ElapsedMilliseconds.Dump();
var l = new List<int>(a);
sw = Stopwatch.StartNew();
for (int index = LENGTH-1; index > 0; index--)
l.RemoveAt(0);
sw.Stop();
sw.ElapsedMilliseconds.Dump();
}
It simply "removes" the 0th element of an array, and a list. The results on my machine are:
3366
3442
3270
3242
3343
3385
That's 3 runs.

Array.Contains runs very slow, anyone shed some light?

I have done some benchmark regarding List.Contains, Array.Contains, IEnumerable.Contains, ICollection.Contains and IList.Contains.
The results are:
array pure 00:00:45.0052754 // 45 sec, slow
array as IList 00:00:02.7900305
array as IEnumerable 00:00:46.5871087 // 45 sec, slow
array as ICollection 00:00:02.7449889
list pure 00:00:01.9907563
list as IList 00:00:02.6626009
list as IEnumerable 00:00:02.9541950
list as ICollection 00:00:02.3341203
As I find out that it would be very slow if call Array.Contains directly (which is equivalent to call IEnumerable)
Also I feel it is strange that MSDN array page doesn't have contains method listed in the extension method section.
Sample code:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
namespace arrayList
{
class Program
{
static void Main(string[] args)
{
Stopwatch watch = new Stopwatch();
Int64 n = 100000000;
Int64[] myarray = new Int64[] { 1, 2, 3 };
List<Int64> mylist = new List<Int64>(myarray);
watch.Start();
for (Int64 j = 0; j < n; j++)
{
bool i = myarray.Contains(2);
}
watch.Stop();
Console.WriteLine("array pure {0}", watch.Elapsed);
watch.Restart();
for (Int64 j = 0; j < n; j++)
{
bool i = (myarray as IList<Int64>).Contains(2);
}
watch.Stop();
Console.WriteLine("array as IList {0}",watch.Elapsed);
watch.Restart();
for (Int64 j = 0; j < n; j++)
{
bool i = (myarray as IEnumerable<Int64>).Contains(2);
}
watch.Stop();
Console.WriteLine("array as IEnumerable {0}",watch.Elapsed);
watch.Restart();
for (Int64 j = 0; j < n; j++)
{
bool i = (myarray as ICollection<Int64>).Contains(2);
}
watch.Stop();
Console.WriteLine("array as ICollection {0}",watch.Elapsed);
watch.Restart();
for (Int64 j = 0; j < n; j++)
{
bool i = mylist.Contains(2);
}
watch.Stop();
Console.WriteLine("list pure {0}", watch.Elapsed);
watch.Restart();
for (Int64 j = 0; j < n; j++)
{
bool i = (mylist as IList<Int64>).Contains(2);
}
watch.Stop();
Console.WriteLine("list as IList {0}", watch.Elapsed);
watch.Restart();
for (Int64 j = 0; j < n; j++)
{
bool i = (mylist as IEnumerable<Int64>).Contains(2);
}
watch.Stop();
Console.WriteLine("list as IEnumerable {0}", watch.Elapsed);
watch.Restart();
for (Int64 j = 0; j < n; j++)
{
bool i = (mylist as ICollection<Int64>).Contains(2);
}
watch.Stop();
Console.WriteLine("list as ICollection {0}", watch.Elapsed);
Console.ReadKey();
}
}
}

The way you are timing these are not sufficient. You need significantly larger inputs to get times representative of the algorithms. Yes Contains() will be slower than a simple linear search (something you've omitted) but the different calls are not going to have the times as you've shown. You're likely to not see any variation between the calls to Contains() when cast to the different types as in all likelihood, we're calling the same implementation for all of them.
Try this code for size:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Diagnostics;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
const int iterations = 1000000;
const long target = 7192;
var arr = Enumerable.Range(0, 10000).Select(i => (long)i).ToArray();
var list = arr.ToList();
bool result;
var arr0 = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++)
{
result = LinearSearchArr(arr, target);
}
arr0.Stop();
var arr1 = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++)
{
// actually Enumerable.Contains()
result = arr.Contains(target);
}
arr1.Stop();
var arr2 = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++)
{
result = ((IList<long>)arr).Contains(target);
}
arr2.Stop();
var arr3 = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++)
{
result = ((IEnumerable<long>)arr).Contains(target);
}
arr3.Stop();
var arr4 = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++)
{
result = ((ICollection<long>)arr).Contains(target);
}
arr4.Stop();
var list0 = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++)
{
result = LinearSearchList(list, target);
}
list0.Stop();
var list1 = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++)
{
result = list.Contains(target);
}
list1.Stop();
var list2 = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++)
{
result = ((IList<long>)list).Contains(target);
}
list2.Stop();
var list3 = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++)
{
result = ((IEnumerable<long>)list).Contains(target);
}
list3.Stop();
var list4 = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++)
{
result = ((ICollection<long>)list).Contains(target);
}
list4.Stop();
Console.WriteLine("array linear {0} ({1})", arr0.Elapsed, arr0.ElapsedTicks);
Console.WriteLine("array pure {0} ({1})", arr1.Elapsed, arr1.ElapsedTicks);
Console.WriteLine("array as IList {0} ({1})", arr2.Elapsed, arr2.ElapsedTicks);
Console.WriteLine("array as IEnumerable {0} ({1})", arr3.Elapsed, arr3.ElapsedTicks);
Console.WriteLine("array as ICollection {0} ({1})", arr4.Elapsed, arr4.ElapsedTicks);
Console.WriteLine("list linear {0} ({1})", list0.Elapsed, list0.ElapsedTicks);
Console.WriteLine("list pure {0} ({1})", list1.Elapsed, list1.ElapsedTicks);
Console.WriteLine("list as IList {0} ({1})", list2.Elapsed, list2.ElapsedTicks);
Console.WriteLine("list as IEnumerable {0} ({1})", list3.Elapsed, list3.ElapsedTicks);
Console.WriteLine("list as ICollection {0} ({1})", list4.Elapsed, list4.ElapsedTicks);
}
static bool LinearSearchArr(long[] arr, long target)
{
for (var i = 0; i < arr.Length; i++)
{
if (arr[i] == target)
{
return true;
}
}
return false;
}
static bool LinearSearchList(List<long> list, long target)
{
for (var i = 0; i < list.Count; i++)
{
if (list[i] == target)
{
return true;
}
}
return false;
}
}
}
Specs:
Windows 7 Professional 64-bit
Intel Core 2 Quad Q9550 # 2.83GHz
4x1GiB Corsair Dominator DDR2 1066 (PC2-8500)
Default .NET 4.0 Console App release build targeting x64:
array linear 00:00:07.7268891 (21379939)
array pure 00:00:12.1703848 (33674883)
array as IList 00:00:12.1764948 (33691789)
array as IEnumerable 00:00:12.5377771 (34691440)
array as ICollection 00:00:12.1827855 (33709195)
list linear 00:00:17.9288343 (49608242)
list pure 00:00:25.8427338 (71505630)
list as IList 00:00:25.8678260 (71575059)
list as IEnumerable 00:00:25.8500101 (71525763)
list as ICollection 00:00:25.8423424 (71504547)

Guess: IList/List use ICollection.Contains wich directly goes through elements in the collection using index.
Array and IEnumerable versions use IEnumerable.Contains that requires creation of enumrator and run genreic iteration code (like MoveNext calls).

Make sure you use the results of the Contains method somehow in your code so that it doesn't optimise that away. I am guessing in one situation it can use a hashtable, and in the others it has to do linear search. Either that or it is just not running your loop as it doesn't do anything.
Either way, who is ever going to write code that casts and then runs contains a million times...

How can I measure the performance of a HashTable in C#?

I am playing around with C# collections and I have decided to write a quick test to measure the performance of different collections.
My performance test goes like this:
int numOps= (put number here);
long start, end, numTicks1, numTicks2;
float ratio;
start = DateTime.Now.Ticks;
for(int i = 0; i < numOps; i++)
{
//add two elements to collection #1
//remove one element from collection #1
}
end = DateTime.Now.Ticks;
numTicks1 = end - start;
start = DateTime.Now.Ticks;
for(int i = 0; i < numOps; i++)
{
//add two elements to collection #2
//remove one element from collection #2
}
end = DateTime.Now.Ticks;
numTicks2 = end - start;
ratio = (float)numTicks2/(float)numTicks1;
Then I compare the ratio value using different Collections and different values for numOps to see how they compare.
The problem is sometimes when I use a small enough number (numOps = 500), the test results between a Hashtable and List are sporadic (in other words it's a coin flip which one is faster). Can anyone explain why this is?
EDIT: Thanks everyone! Stopwatch works like a charm.

try taking a look at StopWatch class instead of using DateTime
this example straight out of MSDN
Stopwatch stopWatch = new Stopwatch();
stopWatch.Start();
Thread.Sleep(10000); //your for loop
stopWatch.Stop();
// Get the elapsed time as a TimeSpan value.
TimeSpan ts = stopWatch.Elapsed;
// Format and display the TimeSpan value.
string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
ts.Hours, ts.Minutes, ts.Seconds,
ts.Milliseconds / 10);
Console.WriteLine(elapsedTime, "RunTime");

I would start by trying out a higher resolution timer.
There are quite a few questions and answers about timers already on SO.
Here's one answer that has a list of options available to you.
In particular, check out System.Diagnostics.Stopwatch.

The proper way to time things diagnostically is to run the code many times iteratively (so that the total time is many multiples of the resolution of whatever timing mechanism you use) and then divide by the number of iterations to get an accurate time estimate.
Stopwatch returns times that are not quantized by 15 ms, so it's obviously more appropriate for timing events.

I used the following code to test the performance of a Dictionary using three different GetHash implementations:
class TestGetHash
{
class First
{
int m_x;
}
class Second
{
static int s_allocated = 0;
int m_allocated;
int m_x;
public Second()
{
m_allocated = ++s_allocated;
}
public override int GetHashCode()
{
return m_allocated;
}
}
class Third
{
int m_x;
public override int GetHashCode()
{
return 0;
}
}
internal static void test()
{
testT<First>(100, 1000);
testT<First>(1000, 100);
testT<Second>(100, 1000);
testT<Second>(1000, 100);
testT<Third>(100, 100);
testT<Third>(1000, 10);
}
static void testT<T>(int objects, int iterations)
where T : new()
{
System.Diagnostics.Stopwatch stopWatch = System.Diagnostics.Stopwatch.StartNew();
for (int i = 0; i < iterations; ++i)
{
Dictionary<T, object> dictionary = new Dictionary<T, object>();
for (int j = 0; j < objects; ++j)
{
T t = new T();
dictionary.Add(t, null);
}
for (int k = 0; k < 100; ++k)
{
foreach (T t in dictionary.Keys)
{
object o = dictionary[t];
}
}
}
stopWatch.Stop();
string stopwatchMessage = string.Format("Stopwatch: {0} type, {1} objects, {2} iterations, {3} msec", typeof(T).Name, objects, iterations, stopWatch.ElapsedMilliseconds);
System.Console.WriteLine(stopwatchMessage);
stopWatch = System.Diagnostics.Stopwatch.StartNew();
for (int i = 0; i < iterations; ++i)
{
Dictionary<T, object> dictionary = new Dictionary<T, object>();
for (int j = 0; j < objects; ++j)
{
T t = new T();
dictionary.Add(t, null);
}
}
stopWatch.Stop();
stopwatchMessage = string.Format("Stopwatch (fill dictionary): {0} type, {1} objects, {2} iterations, {3} msec", typeof(T).Name, objects, iterations, stopWatch.ElapsedMilliseconds);
System.Console.WriteLine(stopwatchMessage);
{
Dictionary<T, object> dictionary = new Dictionary<T, object>();
for (int j = 0; j < objects; ++j)
{
T t = new T();
dictionary.Add(t, null);
}
stopWatch = System.Diagnostics.Stopwatch.StartNew();
for (int i = 0; i < iterations; ++i)
{
for (int k = 0; k < 100; ++k)
{
foreach (T t in dictionary.Keys)
{
object o = dictionary[t];
}
}
}
stopWatch.Stop();
stopwatchMessage = string.Format("Stopwatch (read from dictionary): {0} type, {1} objects, {2} iterations, {3} msec", typeof(T).Name, objects, iterations, stopWatch.ElapsedMilliseconds);
System.Console.WriteLine(stopwatchMessage);
}
}
}

Develop Reference

C# (C-Sharp) is a programming language developed by Microsoft that runs on the .NET Framework.

Performance Benchmarking of Contains, Exists and Any - c#

Related

Stopwatch startup issues

Ways to Improve generic Dictionary performance

C# Array / List performance difference

Array.Contains runs very slow, anyone shed some light?

How can I measure the performance of a HashTable in C#?

Categories

Resources