Related
I'm out of ideas on this one. Tried originally myself and then copied from SO and google, which worked on all cases except one, however still didn't find a recursive algorithm that is fast enough for that particular test case in my assignment :/
In any case, why this:
public static int FindMaximum(int[] array)
{
if (array is null)
{
throw new ArgumentNullException(nameof(array));
}
if (array.Length == 0)
{
throw new ArgumentException(null);
}
return FindMaxRec(array, array.Length);
}
public static int FindMaxRec(int[] arr, int n)
{
if (n == 1)
{
return arr[0];
}
return Math.Max(arr[n - 1], FindMaxRec(arr, n - 1));
}
doesn't work with this TestCase?:
[Test]
[Order(0)]
[Timeout(5_000)]
public void FindMaximum_TestForLargeArray()
{
int expected = this.max;
int actual = FindMaximum(this.array);
Assert.AreEqual(expected, actual);
}
EDIT 1:
This works fine though, but I need recursive:
public static int FindMaximum(int[] array)
{
if (array is null)
{
throw new ArgumentNullException(nameof(array));
}
if (array.Length == 0)
{
throw new ArgumentException(null);
}
int maxValue = int.MinValue;
for (int i = 0; i < array.Length; i++)
{
if (array[i] > maxValue)
{
maxValue = array[i];
}
}
return maxValue;
}
You can try splitting array in two:
public static int FindMaximum(int[] array) {
if (null == array)
throw new ArgumentNullException(nameof(array));
if (array.Length <= 0)
throw new ArgumentException("Empty array is not allowed.", nameof(array));
return FindMaxRec(array, 0, array.Length - 1);
}
private static int FindMaxRec(int[] array, int from, int to) {
if (to < from)
throw new ArgumentOutOfRangeException(nameof(to));
if (to <= from + 1)
return Math.Max(array[from], array[to]);
return Math.Max(FindMaxRec(array, from, (from + to) / 2),
FindMaxRec(array, (from + to) / 2 + 1, to));
}
Demo:
Random random = new Random(123);
int[] data = Enumerable
.Range(0, 10_000_000)
.Select(_ => random.Next(1_000_000_000))
.ToArray();
Stopwatch sw = new Stopwatch();
sw.Start();
int max = FindMaximum(data);
sw.Stop();
Console.WriteLine($"max = {max}");
Console.WriteLine($"time = {sw.ElapsedMilliseconds}");
Outcome:
max = 999999635
time = 100
An easy way to turn a simple linear algorithm into a recursive one is to make use of the enumerator of the array.
public static int FindMax(int[] values)
{
using var enumerator = values.GetEnumerator();
return FindMaxRecursively(enumerator, int.MinValue);
}
private static T FindMaxRecursively<T>(IEnumerator<T> enumerator, T currentMax) where T : IComparable
{
if (!enumerator.MoveNext()) return currentMax;
var currentValue = enumerator.Current;
if (currentValue.CompareTo(currentMax) > 0) currentMax = currentValue;
return FindMaxRecursively(enumerator, currentMax);
}
This passes your test case and uses recursion.
Edit: Here is a more beginner friendly version of the above, with comments to explain what it is doing:
public static int FindMax(IEnumerable<int> values)
{
using var enumerator = values.GetEnumerator();//the using statement disposes the enumerator when we are done
//disposing the enumerator is important because we want to reset the index back to zero for the next time someone enumerates the array
return FindMaxRecursively(enumerator, int.MinValue);
}
private static int FindMaxRecursively(IEnumerator<int> enumerator, int currentMax)
{
if (!enumerator.MoveNext()) //move to the next item in the array. If there are no more items in the array MoveNext() returns false
return currentMax; //if there are no more items in the array return the current maximum value
var currentValue = enumerator.Current;//this is the value in the array at the current index
if (currentValue > currentMax) currentMax = currentValue;//if it's larger than the current maximum update the maximum
return FindMaxRecursively(enumerator, currentMax);//continue on to the next value, making sure to pass the current maximum
}
Something that might help understand this is that the IEnumerator is what enables foreach loops. Under the hood, foreach loops are just repeatedly calling MoveNext on an item that has an IEnumerator. Here is some more info on that topic.
public static int findMax(int[] a, int index) {
if (index > 0) {
return Math.max(a[index], findMax(a, index-1))
} else {
return a[0];
}
}
Is there any way I can separate a List<SomeObject> into several separate lists of SomeObject, using the item index as the delimiter of each split?
Let me exemplify:
I have a List<SomeObject> and I need a List<List<SomeObject>> or List<SomeObject>[], so that each of these resulting lists will contain a group of 3 items of the original list (sequentially).
eg.:
Original List: [a, g, e, w, p, s, q, f, x, y, i, m, c]
Resulting lists: [a, g, e], [w, p, s], [q, f, x], [y, i, m], [c]
I'd also need the resulting lists size to be a parameter of this function.
Try the following code.
public static List<List<T>> Split<T>(IList<T> source)
{
return source
.Select((x, i) => new { Index = i, Value = x })
.GroupBy(x => x.Index / 3)
.Select(x => x.Select(v => v.Value).ToList())
.ToList();
}
The idea is to first group the elements by indexes. Dividing by three has the effect of grouping them into groups of 3. Then convert each group to a list and the IEnumerable of List to a List of Lists
I just wrote this, and I think it's a little more elegant than the other proposed solutions:
/// <summary>
/// Break a list of items into chunks of a specific size
/// </summary>
public static IEnumerable<IEnumerable<T>> Chunk<T>(this IEnumerable<T> source, int chunksize)
{
while (source.Any())
{
yield return source.Take(chunksize);
source = source.Skip(chunksize);
}
}
In general the approach suggested by CaseyB works fine, in fact if you are passing in a List<T> it is hard to fault it, perhaps I would change it to:
public static IEnumerable<IEnumerable<T>> ChunkTrivialBetter<T>(this IEnumerable<T> source, int chunksize)
{
var pos = 0;
while (source.Skip(pos).Any())
{
yield return source.Skip(pos).Take(chunksize);
pos += chunksize;
}
}
Which will avoid massive call chains. Nonetheless, this approach has a general flaw. It materializes two enumerations per chunk, to highlight the issue try running:
foreach (var item in Enumerable.Range(1, int.MaxValue).Chunk(8).Skip(100000).First())
{
Console.WriteLine(item);
}
// wait forever
To overcome this we can try Cameron's approach, which passes the above test in flying colors as it only walks the enumeration once.
Trouble is that it has a different flaw, it materializes every item in each chunk, the trouble with that approach is that you run high on memory.
To illustrate that try running:
foreach (var item in Enumerable.Range(1, int.MaxValue)
.Select(x => x + new string('x', 100000))
.Clump(10000).Skip(100).First())
{
Console.Write('.');
}
// OutOfMemoryException
Finally, any implementation should be able to handle out of order iteration of chunks, for example:
Enumerable.Range(1,3).Chunk(2).Reverse().ToArray()
// should return [3],[1,2]
Many highly optimal solutions like my first revision of this answer failed there. The same issue can be seen in casperOne's optimized answer.
To address all these issues you can use the following:
namespace ChunkedEnumerator
{
public static class Extensions
{
class ChunkedEnumerable<T> : IEnumerable<T>
{
class ChildEnumerator : IEnumerator<T>
{
ChunkedEnumerable<T> parent;
int position;
bool done = false;
T current;
public ChildEnumerator(ChunkedEnumerable<T> parent)
{
this.parent = parent;
position = -1;
parent.wrapper.AddRef();
}
public T Current
{
get
{
if (position == -1 || done)
{
throw new InvalidOperationException();
}
return current;
}
}
public void Dispose()
{
if (!done)
{
done = true;
parent.wrapper.RemoveRef();
}
}
object System.Collections.IEnumerator.Current
{
get { return Current; }
}
public bool MoveNext()
{
position++;
if (position + 1 > parent.chunkSize)
{
done = true;
}
if (!done)
{
done = !parent.wrapper.Get(position + parent.start, out current);
}
return !done;
}
public void Reset()
{
// per http://msdn.microsoft.com/en-us/library/system.collections.ienumerator.reset.aspx
throw new NotSupportedException();
}
}
EnumeratorWrapper<T> wrapper;
int chunkSize;
int start;
public ChunkedEnumerable(EnumeratorWrapper<T> wrapper, int chunkSize, int start)
{
this.wrapper = wrapper;
this.chunkSize = chunkSize;
this.start = start;
}
public IEnumerator<T> GetEnumerator()
{
return new ChildEnumerator(this);
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
class EnumeratorWrapper<T>
{
public EnumeratorWrapper (IEnumerable<T> source)
{
SourceEumerable = source;
}
IEnumerable<T> SourceEumerable {get; set;}
Enumeration currentEnumeration;
class Enumeration
{
public IEnumerator<T> Source { get; set; }
public int Position { get; set; }
public bool AtEnd { get; set; }
}
public bool Get(int pos, out T item)
{
if (currentEnumeration != null && currentEnumeration.Position > pos)
{
currentEnumeration.Source.Dispose();
currentEnumeration = null;
}
if (currentEnumeration == null)
{
currentEnumeration = new Enumeration { Position = -1, Source = SourceEumerable.GetEnumerator(), AtEnd = false };
}
item = default(T);
if (currentEnumeration.AtEnd)
{
return false;
}
while(currentEnumeration.Position < pos)
{
currentEnumeration.AtEnd = !currentEnumeration.Source.MoveNext();
currentEnumeration.Position++;
if (currentEnumeration.AtEnd)
{
return false;
}
}
item = currentEnumeration.Source.Current;
return true;
}
int refs = 0;
// needed for dispose semantics
public void AddRef()
{
refs++;
}
public void RemoveRef()
{
refs--;
if (refs == 0 && currentEnumeration != null)
{
var copy = currentEnumeration;
currentEnumeration = null;
copy.Source.Dispose();
}
}
}
public static IEnumerable<IEnumerable<T>> Chunk<T>(this IEnumerable<T> source, int chunksize)
{
if (chunksize < 1) throw new InvalidOperationException();
var wrapper = new EnumeratorWrapper<T>(source);
int currentPos = 0;
T ignore;
try
{
wrapper.AddRef();
while (wrapper.Get(currentPos, out ignore))
{
yield return new ChunkedEnumerable<T>(wrapper, chunksize, currentPos);
currentPos += chunksize;
}
}
finally
{
wrapper.RemoveRef();
}
}
}
class Program
{
static void Main(string[] args)
{
int i = 10;
foreach (var group in Enumerable.Range(1, int.MaxValue).Skip(10000000).Chunk(3))
{
foreach (var n in group)
{
Console.Write(n);
Console.Write(" ");
}
Console.WriteLine();
if (i-- == 0) break;
}
var stuffs = Enumerable.Range(1, 10).Chunk(2).ToArray();
foreach (var idx in new [] {3,2,1})
{
Console.Write("idx " + idx + " ");
foreach (var n in stuffs[idx])
{
Console.Write(n);
Console.Write(" ");
}
Console.WriteLine();
}
/*
10000001 10000002 10000003
10000004 10000005 10000006
10000007 10000008 10000009
10000010 10000011 10000012
10000013 10000014 10000015
10000016 10000017 10000018
10000019 10000020 10000021
10000022 10000023 10000024
10000025 10000026 10000027
10000028 10000029 10000030
10000031 10000032 10000033
idx 3 7 8
idx 2 5 6
idx 1 3 4
*/
Console.ReadKey();
}
}
}
There is also a round of optimisations you could introduce for out-of-order iteration of chunks, which is out of scope here.
As to which method you should choose? It totally depends on the problem you are trying to solve. If you are not concerned with the first flaw the simple answer is incredibly appealing.
Note as with most methods, this is not safe for multi threading, stuff can get weird if you wish to make it thread safe you would need to amend EnumeratorWrapper.
You could use a number of queries that use Take and Skip, but that would add too many iterations on the original list, I believe.
Rather, I think you should create an iterator of your own, like so:
public static IEnumerable<IEnumerable<T>> GetEnumerableOfEnumerables<T>(
IEnumerable<T> enumerable, int groupSize)
{
// The list to return.
List<T> list = new List<T>(groupSize);
// Cycle through all of the items.
foreach (T item in enumerable)
{
// Add the item.
list.Add(item);
// If the list has the number of elements, return that.
if (list.Count == groupSize)
{
// Return the list.
yield return list;
// Set the list to a new list.
list = new List<T>(groupSize);
}
}
// Return the remainder if there is any,
if (list.Count != 0)
{
// Return the list.
yield return list;
}
}
You can then call this and it is LINQ enabled so you can perform other operations on the resulting sequences.
In light of Sam's answer, I felt there was an easier way to do this without:
Iterating through the list again (which I didn't do originally)
Materializing the items in groups before releasing the chunk (for large chunks of items, there would be memory issues)
All of the code that Sam posted
That said, here's another pass, which I've codified in an extension method to IEnumerable<T> called Chunk:
public static IEnumerable<IEnumerable<T>> Chunk<T>(this IEnumerable<T> source,
int chunkSize)
{
// Validate parameters.
if (source == null) throw new ArgumentNullException(nameof(source));
if (chunkSize <= 0) throw new ArgumentOutOfRangeException(nameof(chunkSize),
"The chunkSize parameter must be a positive value.");
// Call the internal implementation.
return source.ChunkInternal(chunkSize);
}
Nothing surprising up there, just basic error checking.
Moving on to ChunkInternal:
private static IEnumerable<IEnumerable<T>> ChunkInternal<T>(
this IEnumerable<T> source, int chunkSize)
{
// Validate parameters.
Debug.Assert(source != null);
Debug.Assert(chunkSize > 0);
// Get the enumerator. Dispose of when done.
using (IEnumerator<T> enumerator = source.GetEnumerator())
do
{
// Move to the next element. If there's nothing left
// then get out.
if (!enumerator.MoveNext()) yield break;
// Return the chunked sequence.
yield return ChunkSequence(enumerator, chunkSize);
} while (true);
}
Basically, it gets the IEnumerator<T> and manually iterates through each item. It checks to see if there any items currently to be enumerated. After each chunk is enumerated through, if there aren't any items left, it breaks out.
Once it detects there are items in the sequence, it delegates the responsibility for the inner IEnumerable<T> implementation to ChunkSequence:
private static IEnumerable<T> ChunkSequence<T>(IEnumerator<T> enumerator,
int chunkSize)
{
// Validate parameters.
Debug.Assert(enumerator != null);
Debug.Assert(chunkSize > 0);
// The count.
int count = 0;
// There is at least one item. Yield and then continue.
do
{
// Yield the item.
yield return enumerator.Current;
} while (++count < chunkSize && enumerator.MoveNext());
}
Since MoveNext was already called on the IEnumerator<T> passed to ChunkSequence, it yields the item returned by Current and then increments the count, making sure never to return more than chunkSize items and moving to the next item in the sequence after every iteration (but short-circuited if the number of items yielded exceeds the chunk size).
If there are no items left, then the InternalChunk method will make another pass in the outer loop, but when MoveNext is called a second time, it will still return false, as per the documentation (emphasis mine):
If MoveNext passes the end of the collection, the enumerator is
positioned after the last element in the collection and MoveNext
returns false. When the enumerator is at this position, subsequent
calls to MoveNext also return false until Reset is called.
At this point, the loop will break, and the sequence of sequences will terminate.
This is a simple test:
static void Main()
{
string s = "agewpsqfxyimc";
int count = 0;
// Group by three.
foreach (IEnumerable<char> g in s.Chunk(3))
{
// Print out the group.
Console.Write("Group: {0} - ", ++count);
// Print the items.
foreach (char c in g)
{
// Print the item.
Console.Write(c + ", ");
}
// Finish the line.
Console.WriteLine();
}
}
Output:
Group: 1 - a, g, e,
Group: 2 - w, p, s,
Group: 3 - q, f, x,
Group: 4 - y, i, m,
Group: 5 - c,
An important note, this will not work if you don't drain the entire child sequence or break at any point in the parent sequence. This is an important caveat, but if your use case is that you will consume every element of the sequence of sequences, then this will work for you.
Additionally, it will do strange things if you play with the order, just as Sam's did at one point.
Ok, here's my take on it:
completely lazy: works on infinite enumerables
no intermediate copying/buffering
O(n) execution time
works also when inner sequences are only partially consumed
public static IEnumerable<IEnumerable<T>> Chunks<T>(this IEnumerable<T> enumerable,
int chunkSize)
{
if (chunkSize < 1) throw new ArgumentException("chunkSize must be positive");
using (var e = enumerable.GetEnumerator())
while (e.MoveNext())
{
var remaining = chunkSize; // elements remaining in the current chunk
var innerMoveNext = new Func<bool>(() => --remaining > 0 && e.MoveNext());
yield return e.GetChunk(innerMoveNext);
while (innerMoveNext()) {/* discard elements skipped by inner iterator */}
}
}
private static IEnumerable<T> GetChunk<T>(this IEnumerator<T> e,
Func<bool> innerMoveNext)
{
do yield return e.Current;
while (innerMoveNext());
}
Example Usage
var src = new [] {1, 2, 3, 4, 5, 6};
var c3 = src.Chunks(3); // {{1, 2, 3}, {4, 5, 6}};
var c4 = src.Chunks(4); // {{1, 2, 3, 4}, {5, 6}};
var sum = c3.Select(c => c.Sum()); // {6, 15}
var count = c3.Count(); // 2
var take2 = c3.Select(c => c.Take(2)); // {{1, 2}, {4, 5}}
Explanations
The code works by nesting two yield based iterators.
The outer iterator must keep track of how many elements have been effectively consumed by the inner (chunk) iterator. This is done by closing over remaining with innerMoveNext(). Unconsumed elements of a chunk are discarded before the next chunk is yielded by the outer iterator.
This is necessary because otherwise you get inconsistent results, when the inner enumerables are not (completely) consumed (e.g. c3.Count() would return 6).
Note: The answer has been updated to address the shortcomings pointed out by #aolszowka.
Update .NET 6.0
.NET 6.0 added a new native Chunk method to the System.Linq namespace:
public static System.Collections.Generic.IEnumerable<TSource[]> Chunk<TSource> (
this System.Collections.Generic.IEnumerable<TSource> source, int size);
Using this new method every chunk except the last will be of size size. The last chunk will contain the remaining elements and may be of a smaller size.
Here is an example:
var list = Enumerable.Range(1, 100);
var chunkSize = 10;
foreach(var chunk in list.Chunk(chunkSize)) //Returns a chunk with the correct size.
{
Parallel.ForEach(chunk, (item) =>
{
//Do something Parallel here.
Console.WriteLine(item);
});
}
You’re probably thinking, well why not use Skip and Take? Which is true, I think this is just a bit more concise and makes things just that little bit more readable.
completely lazy, no counting or copying:
public static class EnumerableExtensions
{
public static IEnumerable<IEnumerable<T>> Split<T>(this IEnumerable<T> source, int len)
{
if (len == 0)
throw new ArgumentNullException();
var enumer = source.GetEnumerator();
while (enumer.MoveNext())
{
yield return Take(enumer.Current, enumer, len);
}
}
private static IEnumerable<T> Take<T>(T head, IEnumerator<T> tail, int len)
{
while (true)
{
yield return head;
if (--len == 0)
break;
if (tail.MoveNext())
head = tail.Current;
else
break;
}
}
}
I think the following suggestion would be the fastest. I am sacrificing the lazyness of the source Enumerable for the ability to use Array.Copy and knowing ahead of the time the length of each of my sublists.
public static IEnumerable<T[]> Chunk<T>(this IEnumerable<T> items, int size)
{
T[] array = items as T[] ?? items.ToArray();
for (int i = 0; i < array.Length; i+=size)
{
T[] chunk = new T[Math.Min(size, array.Length - i)];
Array.Copy(array, i, chunk, 0, chunk.Length);
yield return chunk;
}
}
For anyone interested in a packaged/maintained solution, the MoreLINQ library provides the Batch extension method which matches your requested behavior:
IEnumerable<char> source = "Example string";
IEnumerable<IEnumerable<char>> chunksOfThreeChars = source.Batch(3);
The Batch implementation is similar to Cameron MacFarland's answer, with the addition of an overload for transforming the chunk/batch before returning, and performs quite well.
I wrote a Clump extension method several years ago. Works great, and is the fastest implementation here. :P
/// <summary>
/// Clumps items into same size lots.
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="source">The source list of items.</param>
/// <param name="size">The maximum size of the clumps to make.</param>
/// <returns>A list of list of items, where each list of items is no bigger than the size given.</returns>
public static IEnumerable<IEnumerable<T>> Clump<T>(this IEnumerable<T> source, int size)
{
if (source == null)
throw new ArgumentNullException("source");
if (size < 1)
throw new ArgumentOutOfRangeException("size", "size must be greater than 0");
return ClumpIterator<T>(source, size);
}
private static IEnumerable<IEnumerable<T>> ClumpIterator<T>(IEnumerable<T> source, int size)
{
Debug.Assert(source != null, "source is null.");
T[] items = new T[size];
int count = 0;
foreach (var item in source)
{
items[count] = item;
count++;
if (count == size)
{
yield return items;
items = new T[size];
count = 0;
}
}
if (count > 0)
{
if (count == size)
yield return items;
else
{
T[] tempItems = new T[count];
Array.Copy(items, tempItems, count);
yield return tempItems;
}
}
}
We can improve #JaredPar's solution to do true lazy evaluation. We use a GroupAdjacentBy method that yields groups of consecutive elements with the same key:
sequence
.Select((x, i) => new { Value = x, Index = i })
.GroupAdjacentBy(x=>x.Index/3)
.Select(g=>g.Select(x=>x.Value))
Because the groups are yielded one-by-one, this solution works efficiently with long or infinite sequences.
System.Interactive provides Buffer() for this purpose. Some quick testing shows performance is similar to Sam's solution.
I find this little snippet does the job quite nicely.
public static IEnumerable<List<T>> Chunked<T>(this List<T> source, int chunkSize)
{
var offset = 0;
while (offset < source.Count)
{
yield return source.GetRange(offset, Math.Min(source.Count - offset, chunkSize));
offset += chunkSize;
}
}
Here's a list splitting routine I wrote a couple months ago:
public static List<List<T>> Chunk<T>(
List<T> theList,
int chunkSize
)
{
List<List<T>> result = theList
.Select((x, i) => new {
data = x,
indexgroup = i / chunkSize
})
.GroupBy(x => x.indexgroup, x => x.data)
.Select(g => new List<T>(g))
.ToList();
return result;
}
We found David B's solution worked the best. But we adapted it to a more general solution:
list.GroupBy(item => item.SomeProperty)
.Select(group => new List<T>(group))
.ToArray();
What about this one?
var input = new List<string> { "a", "g", "e", "w", "p", "s", "q", "f", "x", "y", "i", "m", "c" };
var k = 3
var res = Enumerable.Range(0, (input.Count - 1) / k + 1)
.Select(i => input.GetRange(i * k, Math.Min(k, input.Count - i * k)))
.ToList();
As far as I know, GetRange() is linear in terms of number of items taken. So this should perform well.
This is an old question but this is what I ended up with; it enumerates the enumerable only once, but does create lists for each of the partitions. It doesn't suffer from unexpected behavior when ToArray() is called as some of the implementations do:
public static IEnumerable<IEnumerable<T>> Partition<T>(IEnumerable<T> source, int chunkSize)
{
if (source == null)
{
throw new ArgumentNullException("source");
}
if (chunkSize < 1)
{
throw new ArgumentException("Invalid chunkSize: " + chunkSize);
}
using (IEnumerator<T> sourceEnumerator = source.GetEnumerator())
{
IList<T> currentChunk = new List<T>();
while (sourceEnumerator.MoveNext())
{
currentChunk.Add(sourceEnumerator.Current);
if (currentChunk.Count == chunkSize)
{
yield return currentChunk;
currentChunk = new List<T>();
}
}
if (currentChunk.Any())
{
yield return currentChunk;
}
}
}
Old code, but this is what I've been using:
public static IEnumerable<List<T>> InSetsOf<T>(this IEnumerable<T> source, int max)
{
var toReturn = new List<T>(max);
foreach (var item in source)
{
toReturn.Add(item);
if (toReturn.Count == max)
{
yield return toReturn;
toReturn = new List<T>(max);
}
}
if (toReturn.Any())
{
yield return toReturn;
}
}
This following solution is the most compact I could come up with that is O(n).
public static IEnumerable<T[]> Chunk<T>(IEnumerable<T> source, int chunksize)
{
var list = source as IList<T> ?? source.ToList();
for (int start = 0; start < list.Count; start += chunksize)
{
T[] chunk = new T[Math.Min(chunksize, list.Count - start)];
for (int i = 0; i < chunk.Length; i++)
chunk[i] = list[start + i];
yield return chunk;
}
}
If the list is of type system.collections.generic you can use the "CopyTo" method available to copy elements of your array to other sub arrays. You specify the start element and number of elements to copy.
You could also make 3 clones of your original list and use the "RemoveRange" on each list to shrink the list to the size you want.
Or just create a helper method to do it for you.
It's an old solution but I had a different approach. I use Skip to move to desired offset and Take to extract desired number of elements:
public static IEnumerable<IEnumerable<T>> Chunk<T>(this IEnumerable<T> source,
int chunkSize)
{
if (chunkSize <= 0)
throw new ArgumentOutOfRangeException($"{nameof(chunkSize)} should be > 0");
var nbChunks = (int)Math.Ceiling((double)source.Count()/chunkSize);
return Enumerable.Range(0, nbChunks)
.Select(chunkNb => source.Skip(chunkNb*chunkSize)
.Take(chunkSize));
}
Another way is using Rx Buffer operator
//using System.Linq;
//using System.Reactive.Linq;
//using System.Reactive.Threading.Tasks;
var observableBatches = anAnumerable.ToObservable().Buffer(size);
var batches = aList.ToObservable().Buffer(size).ToList().ToTask().GetAwaiter().GetResult();
The question was how to "Split List into Sublists with LINQ", but sometimes you may want those sub-lists to be references to the original list, not copies. This allows you to modify the original list from the sub-lists. In that case, this may work for you.
public static IEnumerable<Memory<T>> RefChunkBy<T>(this T[] array, int size)
{
if (size < 1 || array is null)
{
throw new ArgumentException("chunkSize must be positive");
}
var index = 0;
var counter = 0;
for (int i = 0; i < array.Length; i++)
{
if (counter == size)
{
yield return new Memory<T>(array, index, size);
index = i;
counter = 0;
}
counter++;
if (i + 1 == array.Length)
{
yield return new Memory<T>(array, index, array.Length - index);
}
}
}
Usage:
var src = new[] { 1, 2, 3, 4, 5, 6 };
var c3 = RefChunkBy(src, 3); // {{1, 2, 3}, {4, 5, 6}};
var c4 = RefChunkBy(src, 4); // {{1, 2, 3, 4}, {5, 6}};
// as extension method
var c3 = src.RefChunkBy(3); // {{1, 2, 3}, {4, 5, 6}};
var c4 = src.RefChunkBy(4); // {{1, 2, 3, 4}, {5, 6}};
var sum = c3.Select(c => c.Span.ToArray().Sum()); // {6, 15}
var count = c3.Count(); // 2
var take2 = c3.Select(c => c.Span.ToArray().Take(2)); // {{1, 2}, {4, 5}}
Feel free to make this code better.
Using modular partitioning:
public IEnumerable<IEnumerable<string>> Split(IEnumerable<string> input, int chunkSize)
{
var chunks = (int)Math.Ceiling((double)input.Count() / (double)chunkSize);
return Enumerable.Range(0, chunks).Select(id => input.Where(s => s.GetHashCode() % chunks == id));
}
Just putting in my two cents. If you wanted to "bucket" the list (visualize left to right), you could do the following:
public static List<List<T>> Buckets<T>(this List<T> source, int numberOfBuckets)
{
List<List<T>> result = new List<List<T>>();
for (int i = 0; i < numberOfBuckets; i++)
{
result.Add(new List<T>());
}
int count = 0;
while (count < source.Count())
{
var mod = count % numberOfBuckets;
result[mod].Add(source[count]);
count++;
}
return result;
}
public static List<List<T>> GetSplitItemsList<T>(List<T> originalItemsList, short number)
{
var listGroup = new List<List<T>>();
int j = number;
for (int i = 0; i < originalItemsList.Count; i += number)
{
var cList = originalItemsList.Take(j).Skip(i).ToList();
j += number;
listGroup.Add(cList);
}
return listGroup;
}
To insert my two cents...
By using the list type for the source to be chunked, I found another very compact solution:
public static IEnumerable<IEnumerable<TSource>> Chunk<TSource>(this IEnumerable<TSource> source, int chunkSize)
{
// copy the source into a list
var chunkList = source.ToList();
// return chunks of 'chunkSize' items
while (chunkList.Count > chunkSize)
{
yield return chunkList.GetRange(0, chunkSize);
chunkList.RemoveRange(0, chunkSize);
}
// return the rest
yield return chunkList;
}
I took the primary answer and made it to be an IOC container to determine where to split. (For who is really looking to only split on 3 items, in reading this post while searching for an answer?)
This method allows one to split on any type of item as needed.
public static List<List<T>> SplitOn<T>(List<T> main, Func<T, bool> splitOn)
{
int groupIndex = 0;
return main.Select( item => new
{
Group = (splitOn.Invoke(item) ? ++groupIndex : groupIndex),
Value = item
})
.GroupBy( it2 => it2.Group)
.Select(x => x.Select(v => v.Value).ToList())
.ToList();
}
So for the OP the code would be
var it = new List<string>()
{ "a", "g", "e", "w", "p", "s", "q", "f", "x", "y", "i", "m", "c" };
int index = 0;
var result = SplitOn(it, (itm) => (index++ % 3) == 0 );
So performatic as the Sam Saffron's approach.
public static IEnumerable<IEnumerable<T>> Batch<T>(this IEnumerable<T> source, int size)
{
if (source == null) throw new ArgumentNullException(nameof(source));
if (size <= 0) throw new ArgumentOutOfRangeException(nameof(size), "Size must be greater than zero.");
return BatchImpl(source, size).TakeWhile(x => x.Any());
}
static IEnumerable<IEnumerable<T>> BatchImpl<T>(this IEnumerable<T> source, int size)
{
var values = new List<T>();
var group = 1;
var disposed = false;
var e = source.GetEnumerator();
try
{
while (!disposed)
{
yield return GetBatch(e, values, group, size, () => { e.Dispose(); disposed = true; });
group++;
}
}
finally
{
if (!disposed)
e.Dispose();
}
}
static IEnumerable<T> GetBatch<T>(IEnumerator<T> e, List<T> values, int group, int size, Action dispose)
{
var min = (group - 1) * size + 1;
var max = group * size;
var hasValue = false;
while (values.Count < min && e.MoveNext())
{
values.Add(e.Current);
}
for (var i = min; i <= max; i++)
{
if (i <= values.Count)
{
hasValue = true;
}
else if (hasValue = e.MoveNext())
{
values.Add(e.Current);
}
else
{
dispose();
}
if (hasValue)
yield return values[i - 1];
else
yield break;
}
}
}
Can work with infinite generators:
a.Zip(a.Skip(1), (x, y) => Enumerable.Repeat(x, 1).Concat(Enumerable.Repeat(y, 1)))
.Zip(a.Skip(2), (xy, z) => xy.Concat(Enumerable.Repeat(z, 1)))
.Where((x, i) => i % 3 == 0)
Demo code: https://ideone.com/GKmL7M
using System;
using System.Collections.Generic;
using System.Linq;
public class Test
{
private static void DoIt(IEnumerable<int> a)
{
Console.WriteLine(String.Join(" ", a));
foreach (var x in a.Zip(a.Skip(1), (x, y) => Enumerable.Repeat(x, 1).Concat(Enumerable.Repeat(y, 1))).Zip(a.Skip(2), (xy, z) => xy.Concat(Enumerable.Repeat(z, 1))).Where((x, i) => i % 3 == 0))
Console.WriteLine(String.Join(" ", x));
Console.WriteLine();
}
public static void Main()
{
DoIt(new int[] {1});
DoIt(new int[] {1, 2});
DoIt(new int[] {1, 2, 3});
DoIt(new int[] {1, 2, 3, 4});
DoIt(new int[] {1, 2, 3, 4, 5});
DoIt(new int[] {1, 2, 3, 4, 5, 6});
}
}
1
1 2
1 2 3
1 2 3
1 2 3 4
1 2 3
1 2 3 4 5
1 2 3
1 2 3 4 5 6
1 2 3
4 5 6
But actually I would prefer to write corresponding method without linq.
I'm currently in the process of writing a class that can represent an infinitely large number (in theory). The constructor of this class creates the object from a string value, which is why the number could be of an extremely large, yet unknown, size.
The reason I started writing this class was because I wanted to be able to make a program that would be able to perform mathematical calculations with numbers of arbitrarily large size. Thus, I started writing a class that could handle values well over the standard ranges of integers, floats, doubles, (hopefully) decimals, etc.
Here are the declarations and the main constructor for the class:
/// <summary>
/// Creates a new instance of the LargeDecimal class, which represents either a whole or decimal number.
/// </summary>
/// <param name="number">The string representation of the number.</param>
public LargeDecimal(string value)
{
string number = value.Replace(" ", "");
if (number.Contains("-") && (number.IndexOf('-') == 0)) {
number = number.Replace("-", "");
IsNegative = true;
}
// Determining whether the number is whole or contains a decimal.
if (number.IndexOf('.') == -1) {
// Does not contain a decimal.
for (int i = 0; i < number.Length; i++)
wholeDigits.Add(int.Parse(number[i].ToString()));
IsWhole = true;
}
else {
// Still check if number is whole. Add all decimal digits.
string[] numArray = number.Split('.');
int sumOfDecimalDigits = 0;
for (int i = 0; i < numArray[1].ToString().Length; i++)
sumOfDecimalDigits += int.Parse(numArray[1].ToString()[i].ToString());
if (sumOfDecimalDigits <= 0) {
// Is a whole number.
for (int i = 0; i < numArray[0].ToString().Length; i++)
wholeDigits.Add(int.Parse(numArray[0].ToString()[i].ToString()));
IsWhole = true;
}
else {
// Is not a whole number.
for (int i = 0; i < numArray[0].ToString().Length; i++)
wholeDigits.Add(int.Parse(numArray[0].ToString()[i].ToString()));
for (int i = 0; i < numArray[1].ToString().Length; i++)
decimalDigits.Add(int.Parse(numArray[1].ToString()[i].ToString()));
IsWhole = false;
}
}
}
The class is basically a representation of a number through two lists of type int, where one list represents the digits that make up the whole partition of the number, and the other list represents the digits that make up the decimal partition of the number (if applicable).
I have written an Add method which accepts two LargeDecimal objects, adds their values together, and returns a new LargeDecimal object with the sum as its value. Though incomplete, it does work with LargeDecimal objects that are whole numbers only, and are both positive or both negative (picture!).
I have realized that adding methods to compare two values (greater than / less than / equal to) would be extremely useful in calculations. However, I am not sure how to check whether the value of a LargeDecimal object is greater or less than the value of another LargeDecimal.
There are cases where I can just compare the amount of items in the wholeDigits list, but that is only when the amounts of items are different for both values.
I am unsure about how to compare two numbers such as: 15498765423654973246 and 15499111137583924246.
And I think it will get more difficult if I will try and compare two fractional numbers: 8573819351.86931 and 8573809999.85999
I do not wish to use integer calculations in conjunction with place values (e.g. in the number 831, the value of the number 8 would be 8 * 100, the value of 3 would be 3 * 10, and the value of 1 would be 1 * 1), because I would like this class to be able to represent values of any given size and length and range (while an int cannot handle values up to 2147483647).
Any help regarding this would be highly appreciated! Thank you all!
I would start by implementing IComparable:
public class LargeDecimal : IComparable<LargeDecimal>
And the implementation would look like:
public int CompareTo(LargeDecimal other)
{
if (other == null) return 1;
if (ReferenceEquals(this, other)) return 0;
if (IsNegative != other.IsNegative)
{
if (other.IsNegative) return 1;
return -1;
}
int multiplier = (IsNegative) ? -1 : 1;
if (wholeDigits.Count > other.wholeDigits.Count) return 1 * multiplier;
if (wholeDigits.Count < other.wholeDigits.Count) return -1 * multiplier;
for (int i = 0; i < wholeDigits.Count; i++)
{
if (wholeDigits[i] > other.wholeDigits[i]) return 1 * multiplier;
if (wholeDigits[i] < other.wholeDigits[i]) return -1 * multiplier;
}
for (int i = 0; i < Math.Min(decimalDigits.Count, other.decimalDigits.Count); i++)
{
if (decimalDigits[i] > other.decimalDigits[i]) return 1 * multiplier;
if (decimalDigits[i] < other.decimalDigits[i]) return -1 * multiplier;
}
if (decimalDigits.Count > other.decimalDigits.Count) return 1 * multiplier;
if (decimalDigits.Count < other.decimalDigits.Count) return -1 * multiplier;
return 0;
}
Update
This project was sitting on my brain at dinner tonight, so I went at it some more for fun. Not sure if this is helpful, but figured I'd share what I came up with.
First, I added fields to make the class actually work:
public bool IsNegative { get; private set; }
public bool IsWhole { get; private set; }
private List<int> wholeDigits;
private List<int> decimalDigits;
Second, I overrode the ToString method so the numbers display nicely:
public override string ToString()
{
return string.Format("{0}{1}{2}{3}",
(IsNegative) ? "-" : "",
string.Join("", wholeDigits),
(IsWhole) ? "" : ".",
(IsWhole) ? "" : string.Join("", decimalDigits));
}
Then I implemented the Equals methods so they work as expected for a number type:
public static bool Equals(LargeDecimal first, LargeDecimal second)
{
return ReferenceEquals(first, null)
? ReferenceEquals(second, null)
: first.Equals(second);
}
public override bool Equals(object obj)
{
return Equals(obj as LargeDecimal);
}
protected bool Equals(LargeDecimal other)
{
return CompareTo(other) == 0;
}
public override int GetHashCode()
{
unchecked
{
var hashCode = (wholeDigits != null)
? wholeDigits.GetHashCode()
: 0;
hashCode = (hashCode * 397) ^
(decimalDigits != null ? decimalDigits.GetHashCode() : 0);
hashCode = (hashCode * 397) ^ IsNegative.GetHashCode();
hashCode = (hashCode * 397) ^ IsWhole.GetHashCode();
return hashCode;
}
}
Next, I added some utility methods to help out with some upcoming tasks:
private void ResetToZero()
{
wholeDigits = new List<int> { 0 };
decimalDigits = new List<int> { 0 };
IsWhole = true;
IsNegative = false;
}
private void NormalizeLists()
{
RemoveLeadingZeroes(wholeDigits);
RemoveTrailingZeroes(decimalDigits);
IsWhole = (decimalDigits.Count == 0
|| (decimalDigits.Count == 1 && decimalDigits[0] == 0));
}
private void AddLeadingZeroes(List<int> list, int numberOfZeroes)
{
if (list == null) return;
for (int i = 0; i < numberOfZeroes; i++)
{
list.Insert(0, 0);
}
}
private void AddTrailingZeroes(List<int> list, int numberOfZeroes)
{
if (list == null) return;
for (int i = 0; i < numberOfZeroes; i++)
{
list.Add(0);
}
}
private void RemoveLeadingZeroes(List<int> list, bool leaveOneIfEmpty = true)
{
if (list == null) return;
var temp = list;
for (int i = 0; i < temp.Count; i++)
{
if (temp[i] == 0)
{
list.RemoveAt(i);
}
else
{
break;
}
}
if (leaveOneIfEmpty && !list.Any()) list.Add(0);
}
private void RemoveTrailingZeroes(List<int> list, bool leaveOneIfEmpty = true)
{
if (list == null) return;
var temp = list;
for (int i = temp.Count -1; i >= 0; i--)
{
if (temp[i] == 0)
{
list.RemoveAt(i);
}
else
{
break;
}
}
if (leaveOneIfEmpty && !list.Any()) list.Add(0);
}
Next, I added some constructors. A default that sets the number to '0', one that parses a string, and another that copies the values from another LargeDecimal:
public LargeDecimal() : this("0") { }
public LargeDecimal(string value)
{
if (value == null) throw new ArgumentNullException("value");
string number = value.Replace(" ", ""); // remove spaces
number = number.TrimStart('0'); // remove leading zeroes
IsNegative = (number.IndexOf('-') == 0); // check for negative
number = number.Replace("-", ""); // remove dashes
// add a zero if there were no numbers before a decimal point
if (number.IndexOf('.') == 0) number = "0" + number;
// Initialize lists
wholeDigits = new List<int>();
decimalDigits = new List<int>();
// Get whole and decimal parts of the number
var numberParts = number.Split(new[] {'.'},
StringSplitOptions.RemoveEmptyEntries);
IsWhole = numberParts.Length == 1;
// Add whole digits to the list
wholeDigits.AddRange(numberParts[0].Select(n => int.Parse(n.ToString())));
// Add decimal digits to the list (if there are any)
if (numberParts.Length > 1 &&
numberParts[1].Sum(n => int.Parse(n.ToString())) > 0)
{
numberParts[1] = numberParts[1].TrimEnd('0');
decimalDigits.AddRange(numberParts[1].Select(n => int.Parse(n.ToString())));
}
NormalizeLists();
}
public LargeDecimal(LargeDecimal initializeFrom)
{
wholeDigits = initializeFrom.wholeDigits
.GetRange(0, initializeFrom.wholeDigits.Count);
decimalDigits = initializeFrom.decimalDigits
.GetRange(0, initializeFrom.decimalDigits.Count);
IsWhole = initializeFrom.IsWhole;
IsNegative = initializeFrom.IsNegative;
NormalizeLists();
}
Then I implemented the Add and Subtract methods
public void Add(LargeDecimal other)
{
if (other == null) return;
if (IsNegative != other.IsNegative)
{
// Get the absolue values of the two operands
var absThis = new LargeDecimal(this) {IsNegative = false};
var absOther = new LargeDecimal(other) {IsNegative = false};
// If the signs are different and the values are the same, reset to 0.
if (absThis == absOther)
{
ResetToZero();
return;
}
// Since the signs are different, we will retain the sign of the larger number
IsNegative = absThis < absOther ? other.IsNegative : IsNegative;
// Assign the difference of the two absolute values
absThis.Subtract(absOther);
wholeDigits = absThis.wholeDigits.GetRange(0, absThis.wholeDigits.Count);
decimalDigits = absThis.decimalDigits.GetRange(0, absThis.decimalDigits.Count);
NormalizeLists();
return;
}
// start with the larger decimal digits list
var newDecimalDigits = new List<int>();
newDecimalDigits = decimalDigits.Count > other.decimalDigits.Count
? decimalDigits.GetRange(0, decimalDigits.Count)
: other.decimalDigits.GetRange(0, other.decimalDigits.Count);
// and add the smaller one to it
int carry = 0; // Represents the value of the 'tens' digit to carry over
for (int i = Math.Min(decimalDigits.Count, other.decimalDigits.Count) - 1; i >= 0; i--)
{
var result = decimalDigits[i] + other.decimalDigits[i] + carry;
carry = Convert.ToInt32(Math.Floor((decimal) result / 10));
result = result % 10;
newDecimalDigits[i] = result;
}
var newWholeDigits = new List<int>();
newWholeDigits = wholeDigits.Count > other.wholeDigits.Count
? wholeDigits.GetRange(0, wholeDigits.Count)
: other.wholeDigits.GetRange(0, other.wholeDigits.Count);
for (int i = Math.Min(wholeDigits.Count, other.wholeDigits.Count) - 1; i >= 0; i--)
{
var result = wholeDigits[i] + other.wholeDigits[i] + carry;
carry = Convert.ToInt32(Math.Floor((decimal)result / 10));
result = result % 10;
newWholeDigits[i] = result;
}
if (carry > 0) newWholeDigits.Insert(0, carry);
wholeDigits = newWholeDigits.GetRange(0, newWholeDigits.Count);
decimalDigits = newDecimalDigits.GetRange(0, newDecimalDigits.Count);
NormalizeLists();
}
public void Subtract(LargeDecimal other)
{
if (other == null) return;
// If the other value is the same as this one, then the difference is zero
if (Equals(other))
{
ResetToZero();
return;
}
// Absolute values will be used to determine how we subtract
var absThis = new LargeDecimal(this) {IsNegative = false};
var absOther = new LargeDecimal(other) {IsNegative = false};
// If the signs are different, then the difference will be the sum
if (IsNegative != other.IsNegative)
{
absThis.Add(absOther);
wholeDigits = absThis.wholeDigits.GetRange(0, absThis.wholeDigits.Count);
decimalDigits = absThis.decimalDigits.GetRange(0, absThis.decimalDigits.Count);
NormalizeLists();
return;
}
// Subtract smallNumber from bigNumber to get the difference
LargeDecimal bigNumber;
LargeDecimal smallNumber;
if (absThis < absOther)
{
bigNumber = new LargeDecimal(absOther);
smallNumber = new LargeDecimal(absThis);
}
else
{
bigNumber = new LargeDecimal(absThis);
smallNumber = new LargeDecimal(absOther);
}
// Pad the whole number and decimal number lists where necessary so that both
// LargeDecimal objects have the same count of whole and decimal numbers.
AddTrailingZeroes(
bigNumber.decimalDigits.Count < smallNumber.decimalDigits.Count
? bigNumber.decimalDigits
: smallNumber.decimalDigits,
Math.Abs(bigNumber.decimalDigits.Count - smallNumber.decimalDigits.Count));
AddLeadingZeroes(smallNumber.wholeDigits,
Math.Abs(bigNumber.wholeDigits.Count - smallNumber.wholeDigits.Count));
var newWholeDigits = new List<int>();
var newDecimalDigits = new List<int>();
bool borrowed = false; // True if we borrowed 1 from next number
for (int i = bigNumber.decimalDigits.Count - 1; i >= 0; i--)
{
if (borrowed)
{
bigNumber.decimalDigits[i] -= 1; // We borrowed one from this number last time
borrowed = false;
}
if (bigNumber.decimalDigits[i] < smallNumber.decimalDigits[i])
{
bigNumber.decimalDigits[i] += 10; // Borrow from next number and add to this one
borrowed = true;
}
// Since we're working from the back of the list, always add to the front
newDecimalDigits.Insert(0, bigNumber.decimalDigits[i] - smallNumber.decimalDigits[i]);
}
for (int i = bigNumber.wholeDigits.Count - 1; i >= 0; i--)
{
if (borrowed)
{
bigNumber.wholeDigits[i] -= 1;
borrowed = false;
}
if (bigNumber.wholeDigits[i] < smallNumber.wholeDigits[i])
{
bigNumber.wholeDigits[i] += 10;
borrowed = true;
}
newWholeDigits.Insert(0, bigNumber.wholeDigits[i] - smallNumber.wholeDigits[i]);
}
if (absThis < absOther) IsNegative = !IsNegative;
wholeDigits = newWholeDigits.GetRange(0, newWholeDigits.Count);
decimalDigits = newDecimalDigits.GetRange(0, newDecimalDigits.Count);
NormalizeLists();
}
And finally overrode the numeric operators:
public static LargeDecimal operator +(LargeDecimal first, LargeDecimal second)
{
if (first == null) return second;
if (second == null) return first;
var result = new LargeDecimal(first);
result.Add(second);
return result;
}
public static LargeDecimal operator -(LargeDecimal first, LargeDecimal second)
{
if (first == null) return second;
if (second == null) return first;
var result = new LargeDecimal(first);
result.Subtract(second);
return result;
}
public static bool operator >(LargeDecimal first, LargeDecimal second)
{
if (first == null) return false;
return first.CompareTo(second) > 0;
}
public static bool operator <(LargeDecimal first, LargeDecimal second)
{
if (second == null) return false;
return second.CompareTo(first) > 0;
}
public static bool operator >=(LargeDecimal first, LargeDecimal second)
{
if (first == null) return false;
return first.CompareTo(second) >= 0;
}
public static bool operator <=(LargeDecimal first, LargeDecimal second)
{
if (second == null) return false;
return second.CompareTo(first) >= 0;
}
public static bool operator ==(LargeDecimal first, LargeDecimal second)
{
return Equals(first, second);
}
public static bool operator !=(LargeDecimal first, LargeDecimal second)
{
return !Equals(first, second);
}
Thanks for the fun challenge!!
Assuming that this implementation looks something like this:
List<int> WholeList;
List<int> FactionalList;
bool IsNegative;
and they both grow away from the decimal point, then a comparison algorithm would go like this
First compare signs. Negative is always less than positive.
Compare lengths of WholeList, longer has larger magnitude (larger number is dependent on sign)
If WholeList.Count the same. Compare each digit starting with most significant (aka WholeList[Count-1] first), first that are different between numbers will determine larger magnitude.
If you make it into the FractionalList, and then run out of digits in one list. The number with the longer FractionalList will be the larger magnitude.
Is there any way I can separate a List<SomeObject> into several separate lists of SomeObject, using the item index as the delimiter of each split?
Let me exemplify:
I have a List<SomeObject> and I need a List<List<SomeObject>> or List<SomeObject>[], so that each of these resulting lists will contain a group of 3 items of the original list (sequentially).
eg.:
Original List: [a, g, e, w, p, s, q, f, x, y, i, m, c]
Resulting lists: [a, g, e], [w, p, s], [q, f, x], [y, i, m], [c]
I'd also need the resulting lists size to be a parameter of this function.
Try the following code.
public static List<List<T>> Split<T>(IList<T> source)
{
return source
.Select((x, i) => new { Index = i, Value = x })
.GroupBy(x => x.Index / 3)
.Select(x => x.Select(v => v.Value).ToList())
.ToList();
}
The idea is to first group the elements by indexes. Dividing by three has the effect of grouping them into groups of 3. Then convert each group to a list and the IEnumerable of List to a List of Lists
I just wrote this, and I think it's a little more elegant than the other proposed solutions:
/// <summary>
/// Break a list of items into chunks of a specific size
/// </summary>
public static IEnumerable<IEnumerable<T>> Chunk<T>(this IEnumerable<T> source, int chunksize)
{
while (source.Any())
{
yield return source.Take(chunksize);
source = source.Skip(chunksize);
}
}
In general the approach suggested by CaseyB works fine, in fact if you are passing in a List<T> it is hard to fault it, perhaps I would change it to:
public static IEnumerable<IEnumerable<T>> ChunkTrivialBetter<T>(this IEnumerable<T> source, int chunksize)
{
var pos = 0;
while (source.Skip(pos).Any())
{
yield return source.Skip(pos).Take(chunksize);
pos += chunksize;
}
}
Which will avoid massive call chains. Nonetheless, this approach has a general flaw. It materializes two enumerations per chunk, to highlight the issue try running:
foreach (var item in Enumerable.Range(1, int.MaxValue).Chunk(8).Skip(100000).First())
{
Console.WriteLine(item);
}
// wait forever
To overcome this we can try Cameron's approach, which passes the above test in flying colors as it only walks the enumeration once.
Trouble is that it has a different flaw, it materializes every item in each chunk, the trouble with that approach is that you run high on memory.
To illustrate that try running:
foreach (var item in Enumerable.Range(1, int.MaxValue)
.Select(x => x + new string('x', 100000))
.Clump(10000).Skip(100).First())
{
Console.Write('.');
}
// OutOfMemoryException
Finally, any implementation should be able to handle out of order iteration of chunks, for example:
Enumerable.Range(1,3).Chunk(2).Reverse().ToArray()
// should return [3],[1,2]
Many highly optimal solutions like my first revision of this answer failed there. The same issue can be seen in casperOne's optimized answer.
To address all these issues you can use the following:
namespace ChunkedEnumerator
{
public static class Extensions
{
class ChunkedEnumerable<T> : IEnumerable<T>
{
class ChildEnumerator : IEnumerator<T>
{
ChunkedEnumerable<T> parent;
int position;
bool done = false;
T current;
public ChildEnumerator(ChunkedEnumerable<T> parent)
{
this.parent = parent;
position = -1;
parent.wrapper.AddRef();
}
public T Current
{
get
{
if (position == -1 || done)
{
throw new InvalidOperationException();
}
return current;
}
}
public void Dispose()
{
if (!done)
{
done = true;
parent.wrapper.RemoveRef();
}
}
object System.Collections.IEnumerator.Current
{
get { return Current; }
}
public bool MoveNext()
{
position++;
if (position + 1 > parent.chunkSize)
{
done = true;
}
if (!done)
{
done = !parent.wrapper.Get(position + parent.start, out current);
}
return !done;
}
public void Reset()
{
// per http://msdn.microsoft.com/en-us/library/system.collections.ienumerator.reset.aspx
throw new NotSupportedException();
}
}
EnumeratorWrapper<T> wrapper;
int chunkSize;
int start;
public ChunkedEnumerable(EnumeratorWrapper<T> wrapper, int chunkSize, int start)
{
this.wrapper = wrapper;
this.chunkSize = chunkSize;
this.start = start;
}
public IEnumerator<T> GetEnumerator()
{
return new ChildEnumerator(this);
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
class EnumeratorWrapper<T>
{
public EnumeratorWrapper (IEnumerable<T> source)
{
SourceEumerable = source;
}
IEnumerable<T> SourceEumerable {get; set;}
Enumeration currentEnumeration;
class Enumeration
{
public IEnumerator<T> Source { get; set; }
public int Position { get; set; }
public bool AtEnd { get; set; }
}
public bool Get(int pos, out T item)
{
if (currentEnumeration != null && currentEnumeration.Position > pos)
{
currentEnumeration.Source.Dispose();
currentEnumeration = null;
}
if (currentEnumeration == null)
{
currentEnumeration = new Enumeration { Position = -1, Source = SourceEumerable.GetEnumerator(), AtEnd = false };
}
item = default(T);
if (currentEnumeration.AtEnd)
{
return false;
}
while(currentEnumeration.Position < pos)
{
currentEnumeration.AtEnd = !currentEnumeration.Source.MoveNext();
currentEnumeration.Position++;
if (currentEnumeration.AtEnd)
{
return false;
}
}
item = currentEnumeration.Source.Current;
return true;
}
int refs = 0;
// needed for dispose semantics
public void AddRef()
{
refs++;
}
public void RemoveRef()
{
refs--;
if (refs == 0 && currentEnumeration != null)
{
var copy = currentEnumeration;
currentEnumeration = null;
copy.Source.Dispose();
}
}
}
public static IEnumerable<IEnumerable<T>> Chunk<T>(this IEnumerable<T> source, int chunksize)
{
if (chunksize < 1) throw new InvalidOperationException();
var wrapper = new EnumeratorWrapper<T>(source);
int currentPos = 0;
T ignore;
try
{
wrapper.AddRef();
while (wrapper.Get(currentPos, out ignore))
{
yield return new ChunkedEnumerable<T>(wrapper, chunksize, currentPos);
currentPos += chunksize;
}
}
finally
{
wrapper.RemoveRef();
}
}
}
class Program
{
static void Main(string[] args)
{
int i = 10;
foreach (var group in Enumerable.Range(1, int.MaxValue).Skip(10000000).Chunk(3))
{
foreach (var n in group)
{
Console.Write(n);
Console.Write(" ");
}
Console.WriteLine();
if (i-- == 0) break;
}
var stuffs = Enumerable.Range(1, 10).Chunk(2).ToArray();
foreach (var idx in new [] {3,2,1})
{
Console.Write("idx " + idx + " ");
foreach (var n in stuffs[idx])
{
Console.Write(n);
Console.Write(" ");
}
Console.WriteLine();
}
/*
10000001 10000002 10000003
10000004 10000005 10000006
10000007 10000008 10000009
10000010 10000011 10000012
10000013 10000014 10000015
10000016 10000017 10000018
10000019 10000020 10000021
10000022 10000023 10000024
10000025 10000026 10000027
10000028 10000029 10000030
10000031 10000032 10000033
idx 3 7 8
idx 2 5 6
idx 1 3 4
*/
Console.ReadKey();
}
}
}
There is also a round of optimisations you could introduce for out-of-order iteration of chunks, which is out of scope here.
As to which method you should choose? It totally depends on the problem you are trying to solve. If you are not concerned with the first flaw the simple answer is incredibly appealing.
Note as with most methods, this is not safe for multi threading, stuff can get weird if you wish to make it thread safe you would need to amend EnumeratorWrapper.
You could use a number of queries that use Take and Skip, but that would add too many iterations on the original list, I believe.
Rather, I think you should create an iterator of your own, like so:
public static IEnumerable<IEnumerable<T>> GetEnumerableOfEnumerables<T>(
IEnumerable<T> enumerable, int groupSize)
{
// The list to return.
List<T> list = new List<T>(groupSize);
// Cycle through all of the items.
foreach (T item in enumerable)
{
// Add the item.
list.Add(item);
// If the list has the number of elements, return that.
if (list.Count == groupSize)
{
// Return the list.
yield return list;
// Set the list to a new list.
list = new List<T>(groupSize);
}
}
// Return the remainder if there is any,
if (list.Count != 0)
{
// Return the list.
yield return list;
}
}
You can then call this and it is LINQ enabled so you can perform other operations on the resulting sequences.
In light of Sam's answer, I felt there was an easier way to do this without:
Iterating through the list again (which I didn't do originally)
Materializing the items in groups before releasing the chunk (for large chunks of items, there would be memory issues)
All of the code that Sam posted
That said, here's another pass, which I've codified in an extension method to IEnumerable<T> called Chunk:
public static IEnumerable<IEnumerable<T>> Chunk<T>(this IEnumerable<T> source,
int chunkSize)
{
// Validate parameters.
if (source == null) throw new ArgumentNullException(nameof(source));
if (chunkSize <= 0) throw new ArgumentOutOfRangeException(nameof(chunkSize),
"The chunkSize parameter must be a positive value.");
// Call the internal implementation.
return source.ChunkInternal(chunkSize);
}
Nothing surprising up there, just basic error checking.
Moving on to ChunkInternal:
private static IEnumerable<IEnumerable<T>> ChunkInternal<T>(
this IEnumerable<T> source, int chunkSize)
{
// Validate parameters.
Debug.Assert(source != null);
Debug.Assert(chunkSize > 0);
// Get the enumerator. Dispose of when done.
using (IEnumerator<T> enumerator = source.GetEnumerator())
do
{
// Move to the next element. If there's nothing left
// then get out.
if (!enumerator.MoveNext()) yield break;
// Return the chunked sequence.
yield return ChunkSequence(enumerator, chunkSize);
} while (true);
}
Basically, it gets the IEnumerator<T> and manually iterates through each item. It checks to see if there any items currently to be enumerated. After each chunk is enumerated through, if there aren't any items left, it breaks out.
Once it detects there are items in the sequence, it delegates the responsibility for the inner IEnumerable<T> implementation to ChunkSequence:
private static IEnumerable<T> ChunkSequence<T>(IEnumerator<T> enumerator,
int chunkSize)
{
// Validate parameters.
Debug.Assert(enumerator != null);
Debug.Assert(chunkSize > 0);
// The count.
int count = 0;
// There is at least one item. Yield and then continue.
do
{
// Yield the item.
yield return enumerator.Current;
} while (++count < chunkSize && enumerator.MoveNext());
}
Since MoveNext was already called on the IEnumerator<T> passed to ChunkSequence, it yields the item returned by Current and then increments the count, making sure never to return more than chunkSize items and moving to the next item in the sequence after every iteration (but short-circuited if the number of items yielded exceeds the chunk size).
If there are no items left, then the InternalChunk method will make another pass in the outer loop, but when MoveNext is called a second time, it will still return false, as per the documentation (emphasis mine):
If MoveNext passes the end of the collection, the enumerator is
positioned after the last element in the collection and MoveNext
returns false. When the enumerator is at this position, subsequent
calls to MoveNext also return false until Reset is called.
At this point, the loop will break, and the sequence of sequences will terminate.
This is a simple test:
static void Main()
{
string s = "agewpsqfxyimc";
int count = 0;
// Group by three.
foreach (IEnumerable<char> g in s.Chunk(3))
{
// Print out the group.
Console.Write("Group: {0} - ", ++count);
// Print the items.
foreach (char c in g)
{
// Print the item.
Console.Write(c + ", ");
}
// Finish the line.
Console.WriteLine();
}
}
Output:
Group: 1 - a, g, e,
Group: 2 - w, p, s,
Group: 3 - q, f, x,
Group: 4 - y, i, m,
Group: 5 - c,
An important note, this will not work if you don't drain the entire child sequence or break at any point in the parent sequence. This is an important caveat, but if your use case is that you will consume every element of the sequence of sequences, then this will work for you.
Additionally, it will do strange things if you play with the order, just as Sam's did at one point.
Ok, here's my take on it:
completely lazy: works on infinite enumerables
no intermediate copying/buffering
O(n) execution time
works also when inner sequences are only partially consumed
public static IEnumerable<IEnumerable<T>> Chunks<T>(this IEnumerable<T> enumerable,
int chunkSize)
{
if (chunkSize < 1) throw new ArgumentException("chunkSize must be positive");
using (var e = enumerable.GetEnumerator())
while (e.MoveNext())
{
var remaining = chunkSize; // elements remaining in the current chunk
var innerMoveNext = new Func<bool>(() => --remaining > 0 && e.MoveNext());
yield return e.GetChunk(innerMoveNext);
while (innerMoveNext()) {/* discard elements skipped by inner iterator */}
}
}
private static IEnumerable<T> GetChunk<T>(this IEnumerator<T> e,
Func<bool> innerMoveNext)
{
do yield return e.Current;
while (innerMoveNext());
}
Example Usage
var src = new [] {1, 2, 3, 4, 5, 6};
var c3 = src.Chunks(3); // {{1, 2, 3}, {4, 5, 6}};
var c4 = src.Chunks(4); // {{1, 2, 3, 4}, {5, 6}};
var sum = c3.Select(c => c.Sum()); // {6, 15}
var count = c3.Count(); // 2
var take2 = c3.Select(c => c.Take(2)); // {{1, 2}, {4, 5}}
Explanations
The code works by nesting two yield based iterators.
The outer iterator must keep track of how many elements have been effectively consumed by the inner (chunk) iterator. This is done by closing over remaining with innerMoveNext(). Unconsumed elements of a chunk are discarded before the next chunk is yielded by the outer iterator.
This is necessary because otherwise you get inconsistent results, when the inner enumerables are not (completely) consumed (e.g. c3.Count() would return 6).
Note: The answer has been updated to address the shortcomings pointed out by #aolszowka.
Update .NET 6.0
.NET 6.0 added a new native Chunk method to the System.Linq namespace:
public static System.Collections.Generic.IEnumerable<TSource[]> Chunk<TSource> (
this System.Collections.Generic.IEnumerable<TSource> source, int size);
Using this new method every chunk except the last will be of size size. The last chunk will contain the remaining elements and may be of a smaller size.
Here is an example:
var list = Enumerable.Range(1, 100);
var chunkSize = 10;
foreach(var chunk in list.Chunk(chunkSize)) //Returns a chunk with the correct size.
{
Parallel.ForEach(chunk, (item) =>
{
//Do something Parallel here.
Console.WriteLine(item);
});
}
You’re probably thinking, well why not use Skip and Take? Which is true, I think this is just a bit more concise and makes things just that little bit more readable.
completely lazy, no counting or copying:
public static class EnumerableExtensions
{
public static IEnumerable<IEnumerable<T>> Split<T>(this IEnumerable<T> source, int len)
{
if (len == 0)
throw new ArgumentNullException();
var enumer = source.GetEnumerator();
while (enumer.MoveNext())
{
yield return Take(enumer.Current, enumer, len);
}
}
private static IEnumerable<T> Take<T>(T head, IEnumerator<T> tail, int len)
{
while (true)
{
yield return head;
if (--len == 0)
break;
if (tail.MoveNext())
head = tail.Current;
else
break;
}
}
}
I think the following suggestion would be the fastest. I am sacrificing the lazyness of the source Enumerable for the ability to use Array.Copy and knowing ahead of the time the length of each of my sublists.
public static IEnumerable<T[]> Chunk<T>(this IEnumerable<T> items, int size)
{
T[] array = items as T[] ?? items.ToArray();
for (int i = 0; i < array.Length; i+=size)
{
T[] chunk = new T[Math.Min(size, array.Length - i)];
Array.Copy(array, i, chunk, 0, chunk.Length);
yield return chunk;
}
}
For anyone interested in a packaged/maintained solution, the MoreLINQ library provides the Batch extension method which matches your requested behavior:
IEnumerable<char> source = "Example string";
IEnumerable<IEnumerable<char>> chunksOfThreeChars = source.Batch(3);
The Batch implementation is similar to Cameron MacFarland's answer, with the addition of an overload for transforming the chunk/batch before returning, and performs quite well.
I wrote a Clump extension method several years ago. Works great, and is the fastest implementation here. :P
/// <summary>
/// Clumps items into same size lots.
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="source">The source list of items.</param>
/// <param name="size">The maximum size of the clumps to make.</param>
/// <returns>A list of list of items, where each list of items is no bigger than the size given.</returns>
public static IEnumerable<IEnumerable<T>> Clump<T>(this IEnumerable<T> source, int size)
{
if (source == null)
throw new ArgumentNullException("source");
if (size < 1)
throw new ArgumentOutOfRangeException("size", "size must be greater than 0");
return ClumpIterator<T>(source, size);
}
private static IEnumerable<IEnumerable<T>> ClumpIterator<T>(IEnumerable<T> source, int size)
{
Debug.Assert(source != null, "source is null.");
T[] items = new T[size];
int count = 0;
foreach (var item in source)
{
items[count] = item;
count++;
if (count == size)
{
yield return items;
items = new T[size];
count = 0;
}
}
if (count > 0)
{
if (count == size)
yield return items;
else
{
T[] tempItems = new T[count];
Array.Copy(items, tempItems, count);
yield return tempItems;
}
}
}
We can improve #JaredPar's solution to do true lazy evaluation. We use a GroupAdjacentBy method that yields groups of consecutive elements with the same key:
sequence
.Select((x, i) => new { Value = x, Index = i })
.GroupAdjacentBy(x=>x.Index/3)
.Select(g=>g.Select(x=>x.Value))
Because the groups are yielded one-by-one, this solution works efficiently with long or infinite sequences.
System.Interactive provides Buffer() for this purpose. Some quick testing shows performance is similar to Sam's solution.
I find this little snippet does the job quite nicely.
public static IEnumerable<List<T>> Chunked<T>(this List<T> source, int chunkSize)
{
var offset = 0;
while (offset < source.Count)
{
yield return source.GetRange(offset, Math.Min(source.Count - offset, chunkSize));
offset += chunkSize;
}
}
Here's a list splitting routine I wrote a couple months ago:
public static List<List<T>> Chunk<T>(
List<T> theList,
int chunkSize
)
{
List<List<T>> result = theList
.Select((x, i) => new {
data = x,
indexgroup = i / chunkSize
})
.GroupBy(x => x.indexgroup, x => x.data)
.Select(g => new List<T>(g))
.ToList();
return result;
}
We found David B's solution worked the best. But we adapted it to a more general solution:
list.GroupBy(item => item.SomeProperty)
.Select(group => new List<T>(group))
.ToArray();
What about this one?
var input = new List<string> { "a", "g", "e", "w", "p", "s", "q", "f", "x", "y", "i", "m", "c" };
var k = 3
var res = Enumerable.Range(0, (input.Count - 1) / k + 1)
.Select(i => input.GetRange(i * k, Math.Min(k, input.Count - i * k)))
.ToList();
As far as I know, GetRange() is linear in terms of number of items taken. So this should perform well.
This is an old question but this is what I ended up with; it enumerates the enumerable only once, but does create lists for each of the partitions. It doesn't suffer from unexpected behavior when ToArray() is called as some of the implementations do:
public static IEnumerable<IEnumerable<T>> Partition<T>(IEnumerable<T> source, int chunkSize)
{
if (source == null)
{
throw new ArgumentNullException("source");
}
if (chunkSize < 1)
{
throw new ArgumentException("Invalid chunkSize: " + chunkSize);
}
using (IEnumerator<T> sourceEnumerator = source.GetEnumerator())
{
IList<T> currentChunk = new List<T>();
while (sourceEnumerator.MoveNext())
{
currentChunk.Add(sourceEnumerator.Current);
if (currentChunk.Count == chunkSize)
{
yield return currentChunk;
currentChunk = new List<T>();
}
}
if (currentChunk.Any())
{
yield return currentChunk;
}
}
}
Old code, but this is what I've been using:
public static IEnumerable<List<T>> InSetsOf<T>(this IEnumerable<T> source, int max)
{
var toReturn = new List<T>(max);
foreach (var item in source)
{
toReturn.Add(item);
if (toReturn.Count == max)
{
yield return toReturn;
toReturn = new List<T>(max);
}
}
if (toReturn.Any())
{
yield return toReturn;
}
}
This following solution is the most compact I could come up with that is O(n).
public static IEnumerable<T[]> Chunk<T>(IEnumerable<T> source, int chunksize)
{
var list = source as IList<T> ?? source.ToList();
for (int start = 0; start < list.Count; start += chunksize)
{
T[] chunk = new T[Math.Min(chunksize, list.Count - start)];
for (int i = 0; i < chunk.Length; i++)
chunk[i] = list[start + i];
yield return chunk;
}
}
If the list is of type system.collections.generic you can use the "CopyTo" method available to copy elements of your array to other sub arrays. You specify the start element and number of elements to copy.
You could also make 3 clones of your original list and use the "RemoveRange" on each list to shrink the list to the size you want.
Or just create a helper method to do it for you.
It's an old solution but I had a different approach. I use Skip to move to desired offset and Take to extract desired number of elements:
public static IEnumerable<IEnumerable<T>> Chunk<T>(this IEnumerable<T> source,
int chunkSize)
{
if (chunkSize <= 0)
throw new ArgumentOutOfRangeException($"{nameof(chunkSize)} should be > 0");
var nbChunks = (int)Math.Ceiling((double)source.Count()/chunkSize);
return Enumerable.Range(0, nbChunks)
.Select(chunkNb => source.Skip(chunkNb*chunkSize)
.Take(chunkSize));
}
Another way is using Rx Buffer operator
//using System.Linq;
//using System.Reactive.Linq;
//using System.Reactive.Threading.Tasks;
var observableBatches = anAnumerable.ToObservable().Buffer(size);
var batches = aList.ToObservable().Buffer(size).ToList().ToTask().GetAwaiter().GetResult();
The question was how to "Split List into Sublists with LINQ", but sometimes you may want those sub-lists to be references to the original list, not copies. This allows you to modify the original list from the sub-lists. In that case, this may work for you.
public static IEnumerable<Memory<T>> RefChunkBy<T>(this T[] array, int size)
{
if (size < 1 || array is null)
{
throw new ArgumentException("chunkSize must be positive");
}
var index = 0;
var counter = 0;
for (int i = 0; i < array.Length; i++)
{
if (counter == size)
{
yield return new Memory<T>(array, index, size);
index = i;
counter = 0;
}
counter++;
if (i + 1 == array.Length)
{
yield return new Memory<T>(array, index, array.Length - index);
}
}
}
Usage:
var src = new[] { 1, 2, 3, 4, 5, 6 };
var c3 = RefChunkBy(src, 3); // {{1, 2, 3}, {4, 5, 6}};
var c4 = RefChunkBy(src, 4); // {{1, 2, 3, 4}, {5, 6}};
// as extension method
var c3 = src.RefChunkBy(3); // {{1, 2, 3}, {4, 5, 6}};
var c4 = src.RefChunkBy(4); // {{1, 2, 3, 4}, {5, 6}};
var sum = c3.Select(c => c.Span.ToArray().Sum()); // {6, 15}
var count = c3.Count(); // 2
var take2 = c3.Select(c => c.Span.ToArray().Take(2)); // {{1, 2}, {4, 5}}
Feel free to make this code better.
Using modular partitioning:
public IEnumerable<IEnumerable<string>> Split(IEnumerable<string> input, int chunkSize)
{
var chunks = (int)Math.Ceiling((double)input.Count() / (double)chunkSize);
return Enumerable.Range(0, chunks).Select(id => input.Where(s => s.GetHashCode() % chunks == id));
}
Just putting in my two cents. If you wanted to "bucket" the list (visualize left to right), you could do the following:
public static List<List<T>> Buckets<T>(this List<T> source, int numberOfBuckets)
{
List<List<T>> result = new List<List<T>>();
for (int i = 0; i < numberOfBuckets; i++)
{
result.Add(new List<T>());
}
int count = 0;
while (count < source.Count())
{
var mod = count % numberOfBuckets;
result[mod].Add(source[count]);
count++;
}
return result;
}
public static List<List<T>> GetSplitItemsList<T>(List<T> originalItemsList, short number)
{
var listGroup = new List<List<T>>();
int j = number;
for (int i = 0; i < originalItemsList.Count; i += number)
{
var cList = originalItemsList.Take(j).Skip(i).ToList();
j += number;
listGroup.Add(cList);
}
return listGroup;
}
To insert my two cents...
By using the list type for the source to be chunked, I found another very compact solution:
public static IEnumerable<IEnumerable<TSource>> Chunk<TSource>(this IEnumerable<TSource> source, int chunkSize)
{
// copy the source into a list
var chunkList = source.ToList();
// return chunks of 'chunkSize' items
while (chunkList.Count > chunkSize)
{
yield return chunkList.GetRange(0, chunkSize);
chunkList.RemoveRange(0, chunkSize);
}
// return the rest
yield return chunkList;
}
I took the primary answer and made it to be an IOC container to determine where to split. (For who is really looking to only split on 3 items, in reading this post while searching for an answer?)
This method allows one to split on any type of item as needed.
public static List<List<T>> SplitOn<T>(List<T> main, Func<T, bool> splitOn)
{
int groupIndex = 0;
return main.Select( item => new
{
Group = (splitOn.Invoke(item) ? ++groupIndex : groupIndex),
Value = item
})
.GroupBy( it2 => it2.Group)
.Select(x => x.Select(v => v.Value).ToList())
.ToList();
}
So for the OP the code would be
var it = new List<string>()
{ "a", "g", "e", "w", "p", "s", "q", "f", "x", "y", "i", "m", "c" };
int index = 0;
var result = SplitOn(it, (itm) => (index++ % 3) == 0 );
So performatic as the Sam Saffron's approach.
public static IEnumerable<IEnumerable<T>> Batch<T>(this IEnumerable<T> source, int size)
{
if (source == null) throw new ArgumentNullException(nameof(source));
if (size <= 0) throw new ArgumentOutOfRangeException(nameof(size), "Size must be greater than zero.");
return BatchImpl(source, size).TakeWhile(x => x.Any());
}
static IEnumerable<IEnumerable<T>> BatchImpl<T>(this IEnumerable<T> source, int size)
{
var values = new List<T>();
var group = 1;
var disposed = false;
var e = source.GetEnumerator();
try
{
while (!disposed)
{
yield return GetBatch(e, values, group, size, () => { e.Dispose(); disposed = true; });
group++;
}
}
finally
{
if (!disposed)
e.Dispose();
}
}
static IEnumerable<T> GetBatch<T>(IEnumerator<T> e, List<T> values, int group, int size, Action dispose)
{
var min = (group - 1) * size + 1;
var max = group * size;
var hasValue = false;
while (values.Count < min && e.MoveNext())
{
values.Add(e.Current);
}
for (var i = min; i <= max; i++)
{
if (i <= values.Count)
{
hasValue = true;
}
else if (hasValue = e.MoveNext())
{
values.Add(e.Current);
}
else
{
dispose();
}
if (hasValue)
yield return values[i - 1];
else
yield break;
}
}
}
Can work with infinite generators:
a.Zip(a.Skip(1), (x, y) => Enumerable.Repeat(x, 1).Concat(Enumerable.Repeat(y, 1)))
.Zip(a.Skip(2), (xy, z) => xy.Concat(Enumerable.Repeat(z, 1)))
.Where((x, i) => i % 3 == 0)
Demo code: https://ideone.com/GKmL7M
using System;
using System.Collections.Generic;
using System.Linq;
public class Test
{
private static void DoIt(IEnumerable<int> a)
{
Console.WriteLine(String.Join(" ", a));
foreach (var x in a.Zip(a.Skip(1), (x, y) => Enumerable.Repeat(x, 1).Concat(Enumerable.Repeat(y, 1))).Zip(a.Skip(2), (xy, z) => xy.Concat(Enumerable.Repeat(z, 1))).Where((x, i) => i % 3 == 0))
Console.WriteLine(String.Join(" ", x));
Console.WriteLine();
}
public static void Main()
{
DoIt(new int[] {1});
DoIt(new int[] {1, 2});
DoIt(new int[] {1, 2, 3});
DoIt(new int[] {1, 2, 3, 4});
DoIt(new int[] {1, 2, 3, 4, 5});
DoIt(new int[] {1, 2, 3, 4, 5, 6});
}
}
1
1 2
1 2 3
1 2 3
1 2 3 4
1 2 3
1 2 3 4 5
1 2 3
1 2 3 4 5 6
1 2 3
4 5 6
But actually I would prefer to write corresponding method without linq.
I have an array X of 10 elements. I would like to create a new array containing all the elements from X that begin at index 3 and ends in index 7. Sure I can easily write a loop that will do it for me but I would like to keep my code as clean as possible. Is there a method in C# that can do it for me?
Something like (pseudo code):
Array NewArray = oldArray.createNewArrayFromRange(int BeginIndex , int EndIndex)
Array.Copy doesn't fit my needs. I need the items in the new array to be clones. Array.copy is just a C-Style memcpy equivalent, it's not what I'm looking for.
You could add it as an extension method:
public static T[] SubArray<T>(this T[] data, int index, int length)
{
T[] result = new T[length];
Array.Copy(data, index, result, 0, length);
return result;
}
static void Main()
{
int[] data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
int[] sub = data.SubArray(3, 4); // contains {3,4,5,6}
}
Update re cloning (which wasn't obvious in the original question). If you really want a deep clone; something like:
public static T[] SubArrayDeepClone<T>(this T[] data, int index, int length)
{
T[] arrCopy = new T[length];
Array.Copy(data, index, arrCopy, 0, length);
using (MemoryStream ms = new MemoryStream())
{
var bf = new BinaryFormatter();
bf.Serialize(ms, arrCopy);
ms.Position = 0;
return (T[])bf.Deserialize(ms);
}
}
This does require the objects to be serializable ([Serializable] or ISerializable), though. You could easily substitute for any other serializer as appropriate - XmlSerializer, DataContractSerializer, protobuf-net, etc.
Note that deep clone is tricky without serialization; in particular, ICloneable is hard to trust in most cases.
You can use Array.Copy(...) to copy into the new array after you've created it, but I don't think there's a method which creates the new array and copies a range of elements.
If you're using .NET 3.5 you could use LINQ:
var newArray = array.Skip(3).Take(5).ToArray();
but that will be somewhat less efficient.
See this answer to a similar question for options for more specific situations.
Have you considered using ArraySegment?
http://msdn.microsoft.com/en-us/library/1hsbd92d.aspx
I see you want to do Cloning, not just copying references.
In this case you can use .Select to project array members to their clones.
For example, if your elements implemented IClonable you could do something like this:
var newArray = array.Skip(3).Take(5).Select(eachElement => eachElement.Clone()).ToArray();
Note: This solution requires .NET Framework 3.5.
The following code does it in one line:
// Source array
string[] Source = new string[] { "A", "B", "C", "D" };
// Extracting a slice into another array
string[] Slice = new List<string>(Source).GetRange(2, 2).ToArray();
In C# 8, they've introduced a new Range and Index type, which can be used like this:
int[] a = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
Index i1 = 3; // number 3 from beginning
Index i2 = ^4; // number 4 from end
var slice = a[i1..i2]; // { 3, 4, 5 }
References:
https://learn.microsoft.com/en-us/dotnet/core/whats-new/dotnet-core-3-0#ranges-and-indices
https://devblogs.microsoft.com/dotnet/building-c-8-0/
string[] arr = { "Parrot" , "Snake" ,"Rabbit" , "Dog" , "cat" };
arr = arr.ToList().GetRange(0, arr.Length -1).ToArray();
Building on Marc's answer but adding the desired cloning behaviour
public static T[] CloneSubArray<T>(this T[] data, int index, int length)
where T : ICloneable
{
T[] result = new T[length];
for (int i = 0; i < length; i++)
{
var original = data[index + i];
if (original != null)
result[i] = (T)original.Clone();
return result;
}
And if implementing ICloneable is too much like hard work a reflective one using Håvard Stranden’s Copyable library to do the heavy lifting required.
using OX.Copyable;
public static T[] DeepCopySubArray<T>(
this T[] data, int index, int length)
{
T[] result = new T[length];
for (int i = 0; i < length; i++)
{
var original = data[index + i];
if (original != null)
result[i] = (T)original.Copy();
return result;
}
Note that the OX.Copyable implementation works with any of:
For the automated copy to work, though, one of the following statements must hold for instance:
Its type must have a parameterless constructor, or
It must be a Copyable, or
It must have an IInstanceProvider registered for its type.
So this should cover almost any situation you have. If you are cloning objects where the sub graph contains things like db connections or file/stream handles you obviously have issues but that it true for any generalized deep copy.
If you want to use some other deep copy approach instead this article lists several others so I would suggest not trying to write your own.
You can do this fairly easially;
object[] foo = new object[10];
object[] bar = new object[7];
Array.Copy(foo, 3, bar, 0, 7);
I think that the code you are looking for is:
Array.Copy(oldArray, 0, newArray, BeginIndex, EndIndex - BeginIndex)
As an alternative to copying the data you can make a wrapper that gives you access to a part of the original array as if it was a copy of the part of the array. The advantage is that you don't get another copy of the data in memory, and the drawback is a slight overhead when accessing the data.
public class SubArray<T> : IEnumerable<T> {
private T[] _original;
private int _start;
public SubArray(T[] original, int start, int len) {
_original = original;
_start = start;
Length = len;
}
public T this[int index] {
get {
if (index < 0 || index >= Length) throw new IndexOutOfRangeException();
return _original[_start + index];
}
}
public int Length { get; private set; }
public IEnumerator<T> GetEnumerator() {
for (int i = 0; i < Length; i++) {
yield return _original[_start + i];
}
}
IEnumerator IEnumerable.GetEnumerator() {
return GetEnumerator();
}
}
Usage:
int[] original = { 1, 2, 3, 4, 5 };
SubArray<int> copy = new SubArray<int>(original, 2, 2);
Console.WriteLine(copy.Length); // shows: 2
Console.WriteLine(copy[0]); // shows: 3
foreach (int i in copy) Console.WriteLine(i); // shows 3 and 4
In C# 8.0, you can now do many fancier works including reverse indices and ranges like in Python, such as:
int[] list = {1, 2, 3, 4, 5, 6};
var list2 = list[2..5].Clone() as int[]; // 3, 4, 5
var list3 = list[..5].Clone() as int[]; // 1, 2, 3, 4, 5
var list4 = list[^4..^0].Clone() as int[]; // reverse index
Array.ConstrainedCopy will work.
public static void ConstrainedCopy (
Array sourceArray,
int sourceIndex,
Array destinationArray,
int destinationIndex,
int length
)
It does not meet your cloning requirement, but it seems simpler than many answers to do:
Array NewArray = new ArraySegment(oldArray,BeginIndex , int Count).ToArray();
There's no single method that will do what you want. You will need to make a clone method available for the class in your array. Then, if LINQ is an option:
Foo[] newArray = oldArray.Skip(3).Take(5).Select(item => item.Clone()).ToArray();
class Foo
{
public Foo Clone()
{
return (Foo)MemberwiseClone();
}
}
How about useing Array.ConstrainedCopy:
int[] ArrayOne = new int[8] {1,2,3,4,5,6,7,8};
int[] ArrayTwo = new int[5];
Array.ConstrainedCopy(ArrayOne, 3, ArrayTwo, 0, 7-3);
Below is my original post. It will not work
You could use Array.CopyTo:
int[] ArrayOne = new int[8] {1,2,3,4,5,6,7,8};
int[] ArrayTwo = new int[5];
ArrayOne.CopyTo(ArrayTwo,3); //starts copy at index=3 until it reaches end of
//either array
How about this:
public T[] CloneCopy(T[] array, int startIndex, int endIndex) where T : ICloneable
{
T[] retArray = new T[endIndex - startIndex];
for (int i = startIndex; i < endIndex; i++)
{
array[i - startIndex] = array[i].Clone();
}
return retArray;
}
You then need to implement the ICloneable interface on all of the classes you need to use this on but that should do it.
I'm not sure how deep it really is, but:
MyArray.ToList<TSource>().GetRange(beginningIndex, endIndex).ToArray()
It's a bit of overhead, but it might cut out an unnecessary method.
As far as cloning goes, I don't think serialization calls your constructors. This may break class invariants if you're doing interesting things in the ctor's.
It seems the safer bet is virtual clone methods calling copy constructors.
protected MyDerivedClass(MyDerivedClass myClass)
{
...
}
public override MyBaseClass Clone()
{
return new MyDerivedClass(this);
}
Cloning elements in an array is not something that can be done in a universal way. Do you want deep cloning or a simple copy of all members?
Let's go for the "best effort" approach: cloning objects using the ICloneable interface or binary serialization:
public static class ArrayExtensions
{
public static T[] SubArray<T>(this T[] array, int index, int length)
{
T[] result = new T[length];
for (int i=index;i<length+index && i<array.Length;i++)
{
if (array[i] is ICloneable)
result[i-index] = (T) ((ICloneable)array[i]).Clone();
else
result[i-index] = (T) CloneObject(array[i]);
}
return result;
}
private static object CloneObject(object obj)
{
BinaryFormatter formatter = new BinaryFormatter();
using (MemoryStream stream = new MemoryStream())
{
formatter.Serialize(stream, obj);
stream.Seek(0,SeekOrigin.Begin);
return formatter.Deserialize(stream);
}
}
}
This is not a perfect solution, because there simply is none that will work for any type of object.
You can take class made by Microsoft:
internal class Set<TElement>
{
private int[] _buckets;
private Slot[] _slots;
private int _count;
private int _freeList;
private readonly IEqualityComparer<TElement> _comparer;
public Set()
: this(null)
{
}
public Set(IEqualityComparer<TElement> comparer)
{
if (comparer == null)
comparer = EqualityComparer<TElement>.Default;
_comparer = comparer;
_buckets = new int[7];
_slots = new Slot[7];
_freeList = -1;
}
public bool Add(TElement value)
{
return !Find(value, true);
}
public bool Contains(TElement value)
{
return Find(value, false);
}
public bool Remove(TElement value)
{
var hashCode = InternalGetHashCode(value);
var index1 = hashCode % _buckets.Length;
var index2 = -1;
for (var index3 = _buckets[index1] - 1; index3 >= 0; index3 = _slots[index3].Next)
{
if (_slots[index3].HashCode == hashCode && _comparer.Equals(_slots[index3].Value, value))
{
if (index2 < 0)
_buckets[index1] = _slots[index3].Next + 1;
else
_slots[index2].Next = _slots[index3].Next;
_slots[index3].HashCode = -1;
_slots[index3].Value = default(TElement);
_slots[index3].Next = _freeList;
_freeList = index3;
return true;
}
index2 = index3;
}
return false;
}
private bool Find(TElement value, bool add)
{
var hashCode = InternalGetHashCode(value);
for (var index = _buckets[hashCode % _buckets.Length] - 1; index >= 0; index = _slots[index].Next)
{
if (_slots[index].HashCode == hashCode && _comparer.Equals(_slots[index].Value, value))
return true;
}
if (add)
{
int index1;
if (_freeList >= 0)
{
index1 = _freeList;
_freeList = _slots[index1].Next;
}
else
{
if (_count == _slots.Length)
Resize();
index1 = _count;
++_count;
}
int index2 = hashCode % _buckets.Length;
_slots[index1].HashCode = hashCode;
_slots[index1].Value = value;
_slots[index1].Next = _buckets[index2] - 1;
_buckets[index2] = index1 + 1;
}
return false;
}
private void Resize()
{
var length = checked(_count * 2 + 1);
var numArray = new int[length];
var slotArray = new Slot[length];
Array.Copy(_slots, 0, slotArray, 0, _count);
for (var index1 = 0; index1 < _count; ++index1)
{
int index2 = slotArray[index1].HashCode % length;
slotArray[index1].Next = numArray[index2] - 1;
numArray[index2] = index1 + 1;
}
_buckets = numArray;
_slots = slotArray;
}
internal int InternalGetHashCode(TElement value)
{
if (value != null)
return _comparer.GetHashCode(value) & int.MaxValue;
return 0;
}
internal struct Slot
{
internal int HashCode;
internal TElement Value;
internal int Next;
}
}
and then
public static T[] GetSub<T>(this T[] first, T[] second)
{
var items = IntersectIteratorWithIndex(first, second);
if (!items.Any()) return new T[] { };
var index = items.First().Item2;
var length = first.Count() - index;
var subArray = new T[length];
Array.Copy(first, index, subArray, 0, length);
return subArray;
}
private static IEnumerable<Tuple<T, Int32>> IntersectIteratorWithIndex<T>(IEnumerable<T> first, IEnumerable<T> second)
{
var firstList = first.ToList();
var set = new Set<T>();
foreach (var i in second)
set.Add(i);
foreach (var i in firstList)
{
if (set.Remove(i))
yield return new Tuple<T, Int32>(i, firstList.IndexOf(i));
}
}
This is the optimal way, I found, to do this:
private void GetSubArrayThroughArraySegment() {
int[] array = { 10, 20, 30 };
ArraySegment<int> segment = new ArraySegment<int>(array, 1, 2);
Console.WriteLine("-- Array --");
int[] original = segment.Array;
foreach (int value in original)
{
Console.WriteLine(value);
}
Console.WriteLine("-- Offset --");
Console.WriteLine(segment.Offset);
Console.WriteLine("-- Count --");
Console.WriteLine(segment.Count);
Console.WriteLine("-- Range --");
for (int i = segment.Offset; i <= segment.Count; i++)
{
Console.WriteLine(segment.Array[i]);
}
}
Hope It Helps!
use extention method :
public static T[] Slice<T>(this T[] source, int start, int end)
{
// Handles negative ends.
if (end < 0)
{
end = source.Length + end;
}
int len = end - start;
// Return new array.
T[] res = new T[len];
for (int i = 0; i < len; i++)
{
res[i] = source[i + start];
}
return res;
}
and you can use it
var NewArray = OldArray.Slice(3,7);
Code from the System.Private.CoreLib.dll:
public static T[] GetSubArray<T>(T[] array, Range range)
{
if (array == null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.array);
}
(int Offset, int Length) offsetAndLength = range.GetOffsetAndLength(array.Length);
int item = offsetAndLength.Offset;
int item2 = offsetAndLength.Length;
if (default(T) != null || typeof(T[]) == array.GetType())
{
if (item2 == 0)
{
return Array.Empty<T>();
}
T[] array2 = new T[item2];
Buffer.Memmove(ref Unsafe.As<byte, T>(ref array2.GetRawSzArrayData()), ref Unsafe.Add(ref Unsafe.As<byte, T>(ref array.GetRawSzArrayData()), item), (uint)item2);
return array2;
}
T[] array3 = (T[])Array.CreateInstance(array.GetType().GetElementType(), item2);
Array.Copy(array, item, array3, 0, item2);
return array3;
}
array1 = [5,6,7,8];
int[] array2 = new int[2];
Array.ConstrainedCopy(array1, 1, array2, 0, 2);
array2 = [6,7];
Array.ConstrainedCopy takes five (5) parameters:
source array (sourceArray)
starting index of source array (sourceIndex)
destination array (destinationArray)
starting index of destination array (destinationIndex)
number of elements to copy (length)
public static T[] SubArray<T>(T[] data, int index, int length)
{
List<T> retVal = new List<T>();
if (data == null || data.Length == 0)
return retVal.ToArray();
bool startRead = false;
int count = 0;
for (int i = 0; i < data.Length; i++)
{
if (i == index && !startRead)
startRead = true;
if (startRead)
{
retVal.Add(data[i]);
count++;
if (count == length)
break;
}
}
return retVal.ToArray();
}