As I was looking the difference between Count and Count(), I thought to glance at the source code of Count(). I saw the following code snippet in which I wonder why the checked keyword is necessary/needed:
int num = 0;
using (IEnumerator<TSource> enumerator = source.GetEnumerator())
{
while (enumerator.MoveNext())
{
num = checked(num + 1);
}
return num;
}
The source code:
// System.Linq.Enumerable
using System.Collections;
using System.Collections.Generic;
public static int Count<TSource>(this IEnumerable<TSource> source)
{
if (source == null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
}
ICollection<TSource> collection = source as ICollection<TSource>;
if (collection != null)
{
return collection.Count;
}
IIListProvider<TSource> iIListProvider = source as IIListProvider<TSource>;
if (iIListProvider != null)
{
return iIListProvider.GetCount(onlyIfCheap: false);
}
ICollection collection2 = source as ICollection;
if (collection2 != null)
{
return collection2.Count;
}
int num = 0;
using (IEnumerator<TSource> enumerator = source.GetEnumerator())
{
while (enumerator.MoveNext())
{
num = checked(num + 1);
}
return num;
}
}
Because it doesn't want to return a negative number in the (admittedly unlikely) event that there are more than 2-billion-odd items in the sequence - or a non-negative but just wrong number in the (even more unlikely) case that there are more than 4-billion-odd items in the sequence. checked will detect the overflow condition.
Related
I'm building a Spec Evaluator which must consider multiple possible OrderBy, as in the next example:
if (spec.Order != null)
{
var count = spec.Order.Count;
if (count == 1)
{
query = query.OrderBy(spec.Order[0]);
}
else if (count == 2)
{
query = query.OrderBy(spec.Order[0])
.ThenBy(spec.Order[1]);
}
else if (count == 3)
{
query = query.OrderBy(spec.Order[0])
.ThenBy(spec.Order[1])
.ThenBy(spec.Order[2]);
}
// And so on...
}
Queryis an IQueryable, spec.Order is a list of clauses: List<Expression<Func<T, object>>>.
I know that I can use an OrderBy with all the clauses passed as string. And I guess I can just project all the Order clauses to a new string comma-separated. But that solution doesn't seem clean.
Is there any other way to dynamically generate one new ThenBy for every item of the Order list, above 1?
You could use a for loop. Basically loop through all of the Order values, use OrderBy for the first one, and ThenBy for subsequent items. Since you've said that you're using IQueryable, I've modified this to use a temporary IOrderedQueryable<T> variable.
if (spec.Order != null)
{
var count = spec.Order.Count;
IOrderedQueryable<T> orderedQuery = null;
for (int i = 0; i < count; ++i)
{
if (i == 0)
{
orderedQuery = query.OrderBy(spec.Order[i]);
}
else
{
orderedQuery = orderedQuery.ThenBy(spec.Order[i]);
}
}
query = orderedQuery ?? query;
}
You could also approach it like this, although I'm not sure how the performance differs between the two methods, if it does at all:
if (spec.Order != null)
{
var count = spec.Order.Count;
for (int i = 0; i < count; ++i)
{
if (query is IOrderedQueryable<T> orderedQuery)
{
query = orderedQuery.ThenBy(spec.Order[i]);
}
else
{
query = query.OrderBy(spec.Order[i]);
}
}
}
In MyList List<Person> there may be a Person with its Name property set to "ComTruise". I need the index of first occurrence of "ComTruise" in MyList, but not the entire Person element.
What I'm doing now is:
string myName = ComTruise;
int thatIndex = MyList.SkipWhile(p => p.Name != myName).Count();
If the list is very large, is there a more optimal way to get the index?
You could use FindIndex
string myName = "ComTruise";
int myIndex = MyList.FindIndex(p => p.Name == myName);
Note: FindIndex returns -1 if no item matching the conditions defined by the supplied predicate can be found in the list.
As it's an ObservableCollection, you can try this
int index = MyList.IndexOf(MyList.Where(p => p.Name == "ComTruise").FirstOrDefault());
It will return -1 if "ComTruise" doesn't exist in your collection.
As mentioned in the comments, this performs two searches. You can optimize it with a for loop.
int index = -1;
for(int i = 0; i < MyList.Count; i++)
{
//case insensitive search
if(String.Equals(MyList[i].Name, "ComTruise", StringComparison.OrdinalIgnoreCase))
{
index = i;
break;
}
}
It might make sense to write a simple extension method that does this:
public static int FindIndex<T>(
this IEnumerable<T> collection, Func<T, bool> predicate)
{
int i = 0;
foreach (var item in collection)
{
if (predicate(item))
return i;
i++;
}
return -1;
}
var p = MyList.Where(p => p.Name == myName).FirstOrDefault();
int thatIndex = -1;
if (p != null)
{
thatIndex = MyList.IndexOf(p);
}
if (p != -1) ...
Here is some dummy code that illustrates what I want to do:
List<int> list1 = new List<int>();
//Code to fill the list
foreach(int number in list1)
{
if(number%5==0)
{
list1.Remove(number);
}
}
Assuming the test actually removes an int, it will throw an error. Is there a way of doing this in a foreach, or do I have to convert it to a for loop?
You can't remove items from a collection that you are iterating thru with a for each.
I would do this...
list1 = list1.Where(l => l % 5 != 0).ToList();
The RemoveAll() method comes closest to what you want, I think:
list1.RemoveAll(i => i%5 == 0);
Actually if you want to remove the list as you state in the O.P you could do:
List<int> list1 = new List<int>();
//Code to fill the list
for(var n = 0; n < list.Count; i++)
{
if (list[n] % 5 == 0)
{
list1.Remove(list[n--]);
}
}
Edited to Add
The reason why you can't change a list while in a for each loos is as follows:
[Serializable()]
public struct Enumerator : IEnumerator<T>, System.Collections.IEnumerator
{
private List<T> list;
private int index;
private int version;
private T current;
internal Enumerator(List<T> list) {
this.list = list;
index = 0;
version = list._version;
current = default(T);
}
public void Dispose() {
}
public bool MoveNext() {
List<T> localList = list;
if (version == localList._version && ((uint)index < (uint)localList._size))
{
current = localList._items[index];
index++;
return true;
}
return MoveNextRare();
}
private bool MoveNextRare()
{
if (version != list._version) {
ThrowHelper.ThrowInvalidOperationException(ExceptionResource.InvalidOperation_EnumFailedVersion);
}
index = list._size + 1;
current = default(T);
return false;
}
public T Current {
get {
return current;
}
}
Object System.Collections.IEnumerator.Current {
get {
if( index == 0 || index == list._size + 1) {
ThrowHelper.ThrowInvalidOperationException(ExceptionResource.InvalidOperation_EnumOpCantHappen);
}
return Current;
}
}
void System.Collections.IEnumerator.Reset() {
if (version != list._version) {
ThrowHelper.ThrowInvalidOperationException(ExceptionResource.InvalidOperation_EnumFailedVersion);
}
index = 0;
current = default(T);
}
}
As far as I know, a collection cannot be modified while in a foreach loop. You need to change it to a for loop. Another way you can accomplish that is using LINQ.
You can't do it in-situ using foreach, because it invalidates the enumerator.
Either take a copy of the list and iterate over that, or use a different type of loop such as a for() loop.
You cannot modify the collection your are enumerating through using foreach. What I often do is use a for loop and go backwards through the collection, thus you can safely remove items since the length won't be affected until you move to the previous item.
I'm pretty happy with the following method. It takes an enumerable and a list of sorted, disjoint ranges and skips items not in the ranges. If the ranges are null, we just walk every item. The enumerable and the list of ranges are both possibly large. We want this method to be as high performance as possible.
Can someone think of a more elegant piece of code? I'm primarily interested in C# implementations, but if someone has a three-character APL implementation, that's cool too.
public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Pair<int, int>> ranges)
{
Debug.Assert(ranges == null || ranges.Count > 0);
int currentItem = 0;
Pair<int, int> currentRange = new Pair<int, int>();
int currentRangeIndex = -1;
bool betweenRanges = false;
if (ranges != null)
{
currentRange = ranges[0];
currentRangeIndex = 0;
betweenRanges = currentRange.First > 0;
}
foreach (T item in source)
{
if (ranges != null) {
if (betweenRanges) {
if (currentItem == currentRange.First)
betweenRanges = false;
else {
currentItem++;
continue;
}
}
}
yield return item;
if (ranges != null) {
if (currentItem == currentRange.Second) {
if (currentRangeIndex == ranges.Count - 1)
break; // We just visited the last item in the ranges
currentRangeIndex = currentRangeIndex + 1;
currentRange = ranges[currentRangeIndex];
betweenRanges = true;
}
}
currentItem++;
}
}
Maybe use linq on your source something like:
public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Pair<int, int>> ranges)
{
if(ranges == null)
return null;
return source.Where((item, index) => ranges.Any(y => y.First < index && y.Second > index)).AsEnumerable();
}
I don't have my Windows PC in front of me and I'm not sure I understood your code correctly, but I tried to understand your text instead and the code above could work.... or something like it.
UPDATED: Regarding the performance issue I would recommend you to test the performance with some simple test and time both of the functions.
You could copy the source list to an array and then for each range, you can block copy from your new source array to a target array in the proper location. If you can get your source collection passed in as an array, that would make this an even better approach. If you do have to do the initial copy, it is O(N) for that operation plus O(M) where M is the total number of items in the final array. So it ends up coming out to O(N) in either case.
Here's my take. I find it easier to understand, if not more elegant.
public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Tuple<int, int>> ranges)
{
if (ranges == null)
return source;
Debug.Assert(ranges.Count > 0);
return WalkRangesInternal(source, ranges);
}
static IEnumerable<T> WalkRangesInternal<T>(IEnumerable<T> source, List<Tuple<int, int>> ranges)
{
int currentItem = 0;
var rangeEnum = ranges.GetEnumerator();
bool moreData = rangeEnum.MoveNext();
using (var sourceEnum = source.GetEnumerator())
while (moreData)
{
// skip over every item in the gap between ranges
while (currentItem < rangeEnum.Current.Item1
&& (moreData = sourceEnum.MoveNext()))
currentItem++;
// yield all the elements in the range
while (currentItem <= rangeEnum.Current.Item2
&& (moreData = sourceEnum.MoveNext()))
{
yield return sourceEnum.Current;
currentItem++;
}
// advance to the next range
moreData = rangeEnum.MoveNext();
}
}
How about this (untested)? Should have pretty similar performance characteristics (pure streaming, no unnecessary buffering, quick exit), but is easier to follow, IMO:
public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source,
List<Pair<int, int>> ranges)
{
if (source == null)
throw new ArgumentNullException("source");
// If ranges is null, just return the source. From spec.
return ranges == null ? source : RangeIterate(source, ranges);
}
private static IEnumerable<T> RangeIterate<T>(IEnumerable<T> source,
List<Pair<int, int>> ranges)
{
// The key bit: a lazy sequence of all valid indices belonging to
// each range. No buffering.
var validIndices = from range in ranges
let start = Math.Max(0, range.First)
from validIndex in Enumerable.Range(start, range.Second - start + 1)
select validIndex;
int currentIndex = -1;
using (var indexErator = validIndices.GetEnumerator())
{
// Optimization: Get out early if there are no ranges.
if (!indexErator.MoveNext())
yield break;
foreach (var item in source)
{
if (++currentIndex == indexErator.Current)
{
// Valid index, yield.
yield return item;
// Move to the next valid index.
// Optimization: get out early if there aren't any more.
if (!indexErator.MoveNext())
yield break;
}
}
}
}
If you don't mind buffering indices, you can do something like this, which is even more clearer, IMO:
public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source,
List<Pair<int, int>> ranges)
{
if (source == null)
throw new ArgumentNullException("source");
if (ranges == null)
return source;
// Optimization: Get out early if there are no ranges.
if (!ranges.Any())
return Enumerable.Empty<T>();
var validIndices = from range in ranges
let start = Math.Max(0, range.First)
from validIndex in Enumerable.Range(start, range.Second - start + 1)
select validIndex;
// Buffer the valid indices into a set.
var validIndicesSet = new HashSet<int>(validIndices);
// Optimization: don't take an item beyond the last index of the last range.
return source.Take(ranges.Last().Second + 1)
.Where((item, index) => validIndicesSet.Contains(index));
}
You could iterate over the collection manually to prevent the enumerator from getting the current item when it will be skipped:
public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Pair<int, int>> ranges)
{
Debug.Assert(ranges == null || ranges.Count > 0);
int currentItem = 0;
Pair<int, int> currentRange = new Pair<int, int>();
int currentRangeIndex = -1;
bool betweenRanges = false;
if (ranges != null)
{
currentRange = ranges[0];
currentRangeIndex = 0;
betweenRanges = currentRange.First > 0;
}
using (IEnumerator<T> enumerator = source.GetEnumerator())
{
while (enumerator.MoveNext())
{
if (ranges != null)
{
if (betweenRanges)
{
if (currentItem == currentRange.First)
betweenRanges = false;
else
{
currentItem++;
continue;
}
}
}
yield return enumerator.Current;
if (ranges != null)
{
if (currentItem == currentRange.Second)
{
if (currentRangeIndex == ranges.Count - 1)
break; // We just visited the last item in the ranges
currentRangeIndex = currentRangeIndex + 1;
currentRange = ranges[currentRangeIndex];
betweenRanges = true;
}
}
currentItem++;
}
}
}
My second try, this will consider the ordering of the ranges. I haven' tried it yet but I thinkt it works :). You could probably extract some of the code to smaller functions to make it more readable.
public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Pair<int, int>> ranges)
{
int currentIndex = 0;
int currentRangeIndex = 0;
int maxRangeIndex = ranges.Length;
bool done = false;
foreach(var item in source)
{
if(currentIndex > range[currentRangeIndex].Second)
{
while(currentIndex > range[currentRangeIndex].Second)
{
if(!++currentRangeIndex < maxRangeIndex)
{
// We've passed last range =>
// set done = true to break outer loop and then break
done = true;
break;
}
}
if(currentIndex > range[currentRangeIndex].First)
yield item; // include if larger than first since we now it's smaller than second
}
else if(currentIndex > range[currentRangeIndex].First)
{
// If higher than first and lower than second we're in range
yield item;
}
if(done) // if done break outer loop
break;
currentIndex++; // always increase index when advancint through source
}
}
Given n enumerables of the same type that return distinct elements in ascending order, for example:
IEnumerable<char> s1 = "adhjlstxyz";
IEnumerable<char> s2 = "bdeijmnpsz";
IEnumerable<char> s3 = "dejlnopsvw";
I want to efficiently find all values that are elements of all enumerables:
IEnumerable<char> sx = Intersect(new[] { s1, s2, s3 });
Debug.Assert(sx.SequenceEqual("djs"));
"Efficiently" here means that
the input enumerables should each be enumerated only once,
the elements of the input enumerables should be retrieved only when needed, and
the algorithm should not recursively enumerate its own output.
I need some hints how to approach a solution.
Here is my (naive) attempt so far:
static IEnumerable<T> Intersect<T>(IEnumerable<T>[] enums)
{
return enums[0].Intersect(
enums.Length == 2 ? enums[1] : Intersect(enums.Skip(1).ToArray()));
}
Enumerable.Intersect collects the first enumerable into a HashSet, then enumerates the second enumerable and yields all matching elements.
Intersect then recursively intersects the result with the next enumerable.
This obviously isn't very efficient (it doesn't meet the constraints). And it doesn't exploit the fact that the elements are sorted at all.
Here is my attempt to intersect two enumerables. Maybe it can be generalized for n enumerables?
static IEnumerable<T> Intersect<T>(IEnumerable<T> first, IEnumerable<T> second)
{
using (var left = first.GetEnumerator())
using (var right = second.GetEnumerator())
{
var leftHasNext = left.MoveNext();
var rightHasNext = right.MoveNext();
var comparer = Comparer<T>.Default;
while (leftHasNext && rightHasNext)
{
switch (Math.Sign(comparer.Compare(left.Current, right.Current)))
{
case -1:
leftHasNext = left.MoveNext();
break;
case 0:
yield return left.Current;
leftHasNext = left.MoveNext();
rightHasNext = right.MoveNext();
break;
case 1:
rightHasNext = right.MoveNext();
break;
}
}
}
}
OK; more complex answer:
public static IEnumerable<T> Intersect<T>(params IEnumerable<T>[] enums) {
return Intersect<T>(null, enums);
}
public static IEnumerable<T> Intersect<T>(IComparer<T> comparer, params IEnumerable<T>[] enums) {
if(enums == null) throw new ArgumentNullException("enums");
if(enums.Length == 0) return Enumerable.Empty<T>();
if(enums.Length == 1) return enums[0];
if(comparer == null) comparer = Comparer<T>.Default;
return IntersectImpl(comparer, enums);
}
public static IEnumerable<T> IntersectImpl<T>(IComparer<T> comparer, IEnumerable<T>[] enums) {
IEnumerator<T>[] iters = new IEnumerator<T>[enums.Length];
try {
// create iterators and move as far as the first item
for (int i = 0; i < enums.Length; i++) {
if(!(iters[i] = enums[i].GetEnumerator()).MoveNext()) {
yield break; // no data for one of the iterators
}
}
bool first = true;
T lastValue = default(T);
do { // get the next item from the first sequence
T value = iters[0].Current;
if (!first && comparer.Compare(value, lastValue) == 0) continue; // dup in first source
bool allTrue = true;
for (int i = 1; i < iters.Length; i++) {
var iter = iters[i];
// if any sequence isn't there yet, progress it; if any sequence
// ends, we're all done
while (comparer.Compare(iter.Current, value) < 0) {
if (!iter.MoveNext()) goto alldone; // nasty, but
}
// if any sequence is now **past** value, then short-circuit
if (comparer.Compare(iter.Current, value) > 0) {
allTrue = false;
break;
}
}
// so all sequences have this value
if (allTrue) yield return value;
first = false;
lastValue = value;
} while (iters[0].MoveNext());
alldone:
;
} finally { // clean up all iterators
for (int i = 0; i < iters.Length; i++) {
if (iters[i] != null) {
try { iters[i].Dispose(); }
catch { }
}
}
}
}
You can use LINQ:
public static IEnumerable<T> Intersect<T>(IEnumerable<IEnumerable<T>> enums) {
using (var iter = enums.GetEnumerator()) {
IEnumerable<T> result;
if (iter.MoveNext()) {
result = iter.Current;
while (iter.MoveNext()) {
result = result.Intersect(iter.Current);
}
} else {
result = Enumerable.Empty<T>();
}
return result;
}
}
This would be simple, although it does build the hash-set multiple times; advancing all n at once (to take advantage of sorted) would be hard, but you could also build a single hash-set and remove missing things?