Related
Anyone have a good resource or provide a sample of a natural order sort in C# for an FileInfo array? I am implementing the IComparer interface in my sorts.
The easiest thing to do is just P/Invoke the built-in function in Windows, and use it as the comparison function in your IComparer:
[DllImport("shlwapi.dll", CharSet = CharSet.Unicode)]
private static extern int StrCmpLogicalW(string psz1, string psz2);
Michael Kaplan has some examples of how this function works here, and the changes that were made for Vista to make it work more intuitively. The plus side of this function is that it will have the same behaviour as the version of Windows it runs on, however this does mean that it differs between versions of Windows so you need to consider whether this is a problem for you.
So a complete implementation would be something like:
[SuppressUnmanagedCodeSecurity]
internal static class SafeNativeMethods
{
[DllImport("shlwapi.dll", CharSet = CharSet.Unicode)]
public static extern int StrCmpLogicalW(string psz1, string psz2);
}
public sealed class NaturalStringComparer : IComparer<string>
{
public int Compare(string a, string b)
{
return SafeNativeMethods.StrCmpLogicalW(a, b);
}
}
public sealed class NaturalFileInfoNameComparer : IComparer<FileInfo>
{
public int Compare(FileInfo a, FileInfo b)
{
return SafeNativeMethods.StrCmpLogicalW(a.Name, b.Name);
}
}
Just thought I'd add to this (with the most concise solution I could find):
public static IOrderedEnumerable<T> OrderByAlphaNumeric<T>(this IEnumerable<T> source, Func<T, string> selector)
{
int max = source
.SelectMany(i => Regex.Matches(selector(i), #"\d+").Cast<Match>().Select(m => (int?)m.Value.Length))
.Max() ?? 0;
return source.OrderBy(i => Regex.Replace(selector(i), #"\d+", m => m.Value.PadLeft(max, '0')));
}
The above pads any numbers in the string to the max length of all numbers in all strings and uses the resulting string to sort.
The cast to (int?) is to allow for collections of strings without any numbers (.Max() on an empty enumerable throws an InvalidOperationException).
None of the existing implementations looked great so I wrote my own. The results are almost identical to the sorting used by modern versions of Windows Explorer (Windows 7/8). The only differences I've seen are 1) although Windows used to (e.g. XP) handle numbers of any length, it's now limited to 19 digits - mine is unlimited, 2) Windows gives inconsistent results with certain sets of Unicode digits - mine works fine (although it doesn't numerically compare digits from surrogate pairs; nor does Windows), and 3) mine can't distinguish different types of non-primary sort weights if they occur in different sections (e.g. "e-1é" vs "é1e-" - the sections before and after the number have diacritic and punctuation weight differences).
public static int CompareNatural(string strA, string strB) {
return CompareNatural(strA, strB, CultureInfo.CurrentCulture, CompareOptions.IgnoreCase);
}
public static int CompareNatural(string strA, string strB, CultureInfo culture, CompareOptions options) {
CompareInfo cmp = culture.CompareInfo;
int iA = 0;
int iB = 0;
int softResult = 0;
int softResultWeight = 0;
while (iA < strA.Length && iB < strB.Length) {
bool isDigitA = Char.IsDigit(strA[iA]);
bool isDigitB = Char.IsDigit(strB[iB]);
if (isDigitA != isDigitB) {
return cmp.Compare(strA, iA, strB, iB, options);
}
else if (!isDigitA && !isDigitB) {
int jA = iA + 1;
int jB = iB + 1;
while (jA < strA.Length && !Char.IsDigit(strA[jA])) jA++;
while (jB < strB.Length && !Char.IsDigit(strB[jB])) jB++;
int cmpResult = cmp.Compare(strA, iA, jA - iA, strB, iB, jB - iB, options);
if (cmpResult != 0) {
// Certain strings may be considered different due to "soft" differences that are
// ignored if more significant differences follow, e.g. a hyphen only affects the
// comparison if no other differences follow
string sectionA = strA.Substring(iA, jA - iA);
string sectionB = strB.Substring(iB, jB - iB);
if (cmp.Compare(sectionA + "1", sectionB + "2", options) ==
cmp.Compare(sectionA + "2", sectionB + "1", options))
{
return cmp.Compare(strA, iA, strB, iB, options);
}
else if (softResultWeight < 1) {
softResult = cmpResult;
softResultWeight = 1;
}
}
iA = jA;
iB = jB;
}
else {
char zeroA = (char)(strA[iA] - (int)Char.GetNumericValue(strA[iA]));
char zeroB = (char)(strB[iB] - (int)Char.GetNumericValue(strB[iB]));
int jA = iA;
int jB = iB;
while (jA < strA.Length && strA[jA] == zeroA) jA++;
while (jB < strB.Length && strB[jB] == zeroB) jB++;
int resultIfSameLength = 0;
do {
isDigitA = jA < strA.Length && Char.IsDigit(strA[jA]);
isDigitB = jB < strB.Length && Char.IsDigit(strB[jB]);
int numA = isDigitA ? (int)Char.GetNumericValue(strA[jA]) : 0;
int numB = isDigitB ? (int)Char.GetNumericValue(strB[jB]) : 0;
if (isDigitA && (char)(strA[jA] - numA) != zeroA) isDigitA = false;
if (isDigitB && (char)(strB[jB] - numB) != zeroB) isDigitB = false;
if (isDigitA && isDigitB) {
if (numA != numB && resultIfSameLength == 0) {
resultIfSameLength = numA < numB ? -1 : 1;
}
jA++;
jB++;
}
}
while (isDigitA && isDigitB);
if (isDigitA != isDigitB) {
// One number has more digits than the other (ignoring leading zeros) - the longer
// number must be larger
return isDigitA ? 1 : -1;
}
else if (resultIfSameLength != 0) {
// Both numbers are the same length (ignoring leading zeros) and at least one of
// the digits differed - the first difference determines the result
return resultIfSameLength;
}
int lA = jA - iA;
int lB = jB - iB;
if (lA != lB) {
// Both numbers are equivalent but one has more leading zeros
return lA > lB ? -1 : 1;
}
else if (zeroA != zeroB && softResultWeight < 2) {
softResult = cmp.Compare(strA, iA, 1, strB, iB, 1, options);
softResultWeight = 2;
}
iA = jA;
iB = jB;
}
}
if (iA < strA.Length || iB < strB.Length) {
return iA < strA.Length ? 1 : -1;
}
else if (softResult != 0) {
return softResult;
}
return 0;
}
The signature matches the Comparison<string> delegate:
string[] files = Directory.GetFiles(#"C:\");
Array.Sort(files, CompareNatural);
Here's a wrapper class for use as IComparer<string>:
public class CustomComparer<T> : IComparer<T> {
private Comparison<T> _comparison;
public CustomComparer(Comparison<T> comparison) {
_comparison = comparison;
}
public int Compare(T x, T y) {
return _comparison(x, y);
}
}
Example:
string[] files = Directory.EnumerateFiles(#"C:\")
.OrderBy(f => f, new CustomComparer<string>(CompareNatural))
.ToArray();
Here's a good set of filenames I use for testing:
Func<string, string> expand = (s) => { int o; while ((o = s.IndexOf('\\')) != -1) { int p = o + 1;
int z = 1; while (s[p] == '0') { z++; p++; } int c = Int32.Parse(s.Substring(p, z));
s = s.Substring(0, o) + new string(s[o - 1], c) + s.Substring(p + z); } return s; };
string encodedFileNames =
"KDEqLW4xMiotbjEzKjAwMDFcMDY2KjAwMlwwMTcqMDA5XDAxNyowMlwwMTcqMDlcMDE3KjEhKjEtISox" +
"LWEqMS4yNT8xLjI1KjEuNT8xLjUqMSoxXDAxNyoxXDAxOCoxXDAxOSoxXDA2NioxXDA2NyoxYSoyXDAx" +
"NyoyXDAxOCo5XDAxNyo5XDAxOCo5XDA2Nio9MSphMDAxdGVzdDAxKmEwMDF0ZXN0aW5nYTBcMzEqYTAw" +
"Mj9hMDAyIGE/YTAwMiBhKmEwMDIqYTAwMmE/YTAwMmEqYTAxdGVzdGluZ2EwMDEqYTAxdnNmcyphMSph" +
"MWEqYTF6KmEyKmIwMDAzcTYqYjAwM3E0KmIwM3E1KmMtZSpjZCpjZipmIDEqZipnP2cgMT9oLW4qaG8t" +
"bipJKmljZS1jcmVhbT9pY2VjcmVhbT9pY2VjcmVhbS0/ajBcNDE/ajAwMWE/ajAxP2shKmsnKmstKmsx" +
"KmthKmxpc3QqbTAwMDNhMDA1YSptMDAzYTAwMDVhKm0wMDNhMDA1Km0wMDNhMDA1YSpuMTIqbjEzKm8t" +
"bjAxMypvLW4xMipvLW40P28tbjQhP28tbjR6P28tbjlhLWI1Km8tbjlhYjUqb24wMTMqb24xMipvbjQ/" +
"b240IT9vbjR6P29uOWEtYjUqb245YWI1Km/CrW4wMTMqb8KtbjEyKnAwMCpwMDEqcDAxwr0hKnAwMcK9" +
"KnAwMcK9YSpwMDHCvcK+KnAwMipwMMK9KnEtbjAxMypxLW4xMipxbjAxMypxbjEyKnItMDAhKnItMDAh" +
"NSpyLTAwIe+8lSpyLTAwYSpyLe+8kFwxIS01KnIt77yQXDEhLe+8lSpyLe+8kFwxISpyLe+8kFwxITUq" +
"ci3vvJBcMSHvvJUqci3vvJBcMWEqci3vvJBcMyE1KnIwMCEqcjAwLTUqcjAwLjUqcjAwNSpyMDBhKnIw" +
"NSpyMDYqcjQqcjUqctmg2aYqctmkKnLZpSpy27Dbtipy27Qqctu1KnLfgN+GKnLfhCpy34UqcuClpuCl" +
"rCpy4KWqKnLgpasqcuCnpuCnrCpy4KeqKnLgp6sqcuCppuCprCpy4KmqKnLgqasqcuCrpuCrrCpy4Kuq" +
"KnLgq6sqcuCtpuCtrCpy4K2qKnLgrasqcuCvpuCvrCpy4K+qKnLgr6sqcuCxpuCxrCpy4LGqKnLgsasq" +
"cuCzpuCzrCpy4LOqKnLgs6sqcuC1puC1rCpy4LWqKnLgtasqcuC5kOC5lipy4LmUKnLguZUqcuC7kOC7" +
"lipy4LuUKnLgu5UqcuC8oOC8pipy4LykKnLgvKUqcuGBgOGBhipy4YGEKnLhgYUqcuGCkOGClipy4YKU" +
"KnLhgpUqcuGfoOGfpipy4Z+kKnLhn6UqcuGgkOGglipy4aCUKnLhoJUqcuGlhuGljCpy4aWKKnLhpYsq" +
"cuGnkOGnlipy4aeUKnLhp5UqcuGtkOGtlipy4a2UKnLhrZUqcuGusOGutipy4a60KnLhrrUqcuGxgOGx" +
"hipy4bGEKnLhsYUqcuGxkOGxlipy4bGUKnLhsZUqcuqYoFwx6pilKnLqmKDqmKUqcuqYoOqYpipy6pik" +
"KnLqmKUqcuqjkOqjlipy6qOUKnLqo5UqcuqkgOqkhipy6qSEKnLqpIUqcuqpkOqplipy6qmUKnLqqZUq" +
"cvCQkqAqcvCQkqUqcvCdn5gqcvCdn50qcu+8kFwxISpy77yQXDEt77yVKnLvvJBcMS7vvJUqcu+8kFwx" +
"YSpy77yQXDHqmKUqcu+8kFwx77yO77yVKnLvvJBcMe+8lSpy77yQ77yVKnLvvJDvvJYqcu+8lCpy77yV" +
"KnNpKnPEsSp0ZXN02aIqdGVzdNmi2aAqdGVzdNmjKnVBZS0qdWFlKnViZS0qdUJlKnVjZS0xw6kqdWNl" +
"McOpLSp1Y2Uxw6kqdWPDqS0xZSp1Y8OpMWUtKnVjw6kxZSp3ZWlhMSp3ZWlhMip3ZWlzczEqd2Vpc3My" +
"KndlaXoxKndlaXoyKndlacOfMSp3ZWnDnzIqeSBhMyp5IGE0KnknYTMqeSdhNCp5K2EzKnkrYTQqeS1h" +
"Myp5LWE0KnlhMyp5YTQqej96IDA1MD96IDIxP3ohMjE/ejIwP3oyMj96YTIxP3rCqTIxP1sxKl8xKsKt" +
"bjEyKsKtbjEzKsSwKg==";
string[] fileNames = Encoding.UTF8.GetString(Convert.FromBase64String(encodedFileNames))
.Replace("*", ".txt?").Split(new[] { "?" }, StringSplitOptions.RemoveEmptyEntries)
.Select(n => expand(n)).ToArray();
Matthews Horsleys answer is the fastest method which doesn't change behaviour depending on which version of windows your program is running on. However, it can be even faster by creating the regex once, and using RegexOptions.Compiled. I also added the option of inserting a string comparer so you can ignore case if needed, and improved readability a bit.
public static IEnumerable<T> OrderByNatural<T>(this IEnumerable<T> items, Func<T, string> selector, StringComparer stringComparer = null)
{
var regex = new Regex(#"\d+", RegexOptions.Compiled);
int maxDigits = items
.SelectMany(i => regex.Matches(selector(i)).Cast<Match>().Select(digitChunk => (int?)digitChunk.Value.Length))
.Max() ?? 0;
return items.OrderBy(i => regex.Replace(selector(i), match => match.Value.PadLeft(maxDigits, '0')), stringComparer ?? StringComparer.CurrentCulture);
}
Use by
var sortedEmployees = employees.OrderByNatural(emp => emp.Name);
This takes 450ms to sort 100,000 strings compared to 300ms for the default .net string comparison - pretty fast!
Pure C# solution for linq orderby:
http://zootfroot.blogspot.com/2009/09/natural-sort-compare-with-linq-orderby.html
public class NaturalSortComparer<T> : IComparer<string>, IDisposable
{
private bool isAscending;
public NaturalSortComparer(bool inAscendingOrder = true)
{
this.isAscending = inAscendingOrder;
}
#region IComparer<string> Members
public int Compare(string x, string y)
{
throw new NotImplementedException();
}
#endregion
#region IComparer<string> Members
int IComparer<string>.Compare(string x, string y)
{
if (x == y)
return 0;
string[] x1, y1;
if (!table.TryGetValue(x, out x1))
{
x1 = Regex.Split(x.Replace(" ", ""), "([0-9]+)");
table.Add(x, x1);
}
if (!table.TryGetValue(y, out y1))
{
y1 = Regex.Split(y.Replace(" ", ""), "([0-9]+)");
table.Add(y, y1);
}
int returnVal;
for (int i = 0; i < x1.Length && i < y1.Length; i++)
{
if (x1[i] != y1[i])
{
returnVal = PartCompare(x1[i], y1[i]);
return isAscending ? returnVal : -returnVal;
}
}
if (y1.Length > x1.Length)
{
returnVal = 1;
}
else if (x1.Length > y1.Length)
{
returnVal = -1;
}
else
{
returnVal = 0;
}
return isAscending ? returnVal : -returnVal;
}
private static int PartCompare(string left, string right)
{
int x, y;
if (!int.TryParse(left, out x))
return left.CompareTo(right);
if (!int.TryParse(right, out y))
return left.CompareTo(right);
return x.CompareTo(y);
}
#endregion
private Dictionary<string, string[]> table = new Dictionary<string, string[]>();
public void Dispose()
{
table.Clear();
table = null;
}
}
My solution:
void Main()
{
new[] {"a4","a3","a2","a10","b5","b4","b400","1","C1d","c1d2"}.OrderBy(x => x, new NaturalStringComparer()).Dump();
}
public class NaturalStringComparer : IComparer<string>
{
private static readonly Regex _re = new Regex(#"(?<=\D)(?=\d)|(?<=\d)(?=\D)", RegexOptions.Compiled);
public int Compare(string x, string y)
{
x = x.ToLower();
y = y.ToLower();
if(string.Compare(x, 0, y, 0, Math.Min(x.Length, y.Length)) == 0)
{
if(x.Length == y.Length) return 0;
return x.Length < y.Length ? -1 : 1;
}
var a = _re.Split(x);
var b = _re.Split(y);
int i = 0;
while(true)
{
int r = PartCompare(a[i], b[i]);
if(r != 0) return r;
++i;
}
}
private static int PartCompare(string x, string y)
{
int a, b;
if(int.TryParse(x, out a) && int.TryParse(y, out b))
return a.CompareTo(b);
return x.CompareTo(y);
}
}
Results:
1
a2
a3
a4
a10
b4
b5
b400
C1d
c1d2
You do need to be careful -- I vaguely recall reading that StrCmpLogicalW, or something like it, was not strictly transitive, and I have observed .NET's sort methods to sometimes get stuck in infinite loops if the comparison function breaks that rule.
A transitive comparison will always report that a < c if a < b and b < c. There exists a function that does a natural sort order comparison that does not always meet that criterion, but I can't recall whether it is StrCmpLogicalW or something else.
This is my code to sort a string having both alpha and numeric characters.
First, this extension method:
public static IEnumerable<string> AlphanumericSort(this IEnumerable<string> me)
{
return me.OrderBy(x => Regex.Replace(x, #"\d+", m => m.Value.PadLeft(50, '0')));
}
Then, simply use it anywhere in your code like this:
List<string> test = new List<string>() { "The 1st", "The 12th", "The 2nd" };
test = test.AlphanumericSort();
How does it works ? By replaceing with zeros:
Original | Regex Replace | The | Returned
List | Apply PadLeft | Sorting | List
| | |
"The 1st" | "The 001st" | "The 001st" | "The 1st"
"The 12th" | "The 012th" | "The 002nd" | "The 2nd"
"The 2nd" | "The 002nd" | "The 012th" | "The 12th"
Works with multiples numbers:
Alphabetical Sorting | Alphanumeric Sorting
|
"Page 21, Line 42" | "Page 3, Line 7"
"Page 21, Line 5" | "Page 3, Line 32"
"Page 3, Line 32" | "Page 21, Line 5"
"Page 3, Line 7" | "Page 21, Line 42"
Hope that's will help.
Here's a version for .NET Core 2.1+ / .NET 5.0+, using spans to avoid allocations
public class NaturalSortStringComparer : IComparer<string>
{
public static NaturalSortStringComparer Ordinal { get; } = new NaturalSortStringComparer(StringComparison.Ordinal);
public static NaturalSortStringComparer OrdinalIgnoreCase { get; } = new NaturalSortStringComparer(StringComparison.OrdinalIgnoreCase);
public static NaturalSortStringComparer CurrentCulture { get; } = new NaturalSortStringComparer(StringComparison.CurrentCulture);
public static NaturalSortStringComparer CurrentCultureIgnoreCase { get; } = new NaturalSortStringComparer(StringComparison.CurrentCultureIgnoreCase);
public static NaturalSortStringComparer InvariantCulture { get; } = new NaturalSortStringComparer(StringComparison.InvariantCulture);
public static NaturalSortStringComparer InvariantCultureIgnoreCase { get; } = new NaturalSortStringComparer(StringComparison.InvariantCultureIgnoreCase);
private readonly StringComparison _comparison;
public NaturalSortStringComparer(StringComparison comparison)
{
_comparison = comparison;
}
public int Compare(string x, string y)
{
// Let string.Compare handle the case where x or y is null
if (x is null || y is null)
return string.Compare(x, y, _comparison);
var xSegments = GetSegments(x);
var ySegments = GetSegments(y);
while (xSegments.MoveNext() && ySegments.MoveNext())
{
int cmp;
// If they're both numbers, compare the value
if (xSegments.CurrentIsNumber && ySegments.CurrentIsNumber)
{
var xValue = long.Parse(xSegments.Current);
var yValue = long.Parse(ySegments.Current);
cmp = xValue.CompareTo(yValue);
if (cmp != 0)
return cmp;
}
// If x is a number and y is not, x is "lesser than" y
else if (xSegments.CurrentIsNumber)
{
return -1;
}
// If y is a number and x is not, x is "greater than" y
else if (ySegments.CurrentIsNumber)
{
return 1;
}
// OK, neither are number, compare the segments as text
cmp = xSegments.Current.CompareTo(ySegments.Current, _comparison);
if (cmp != 0)
return cmp;
}
// At this point, either all segments are equal, or one string is shorter than the other
// If x is shorter, it's "lesser than" y
if (x.Length < y.Length)
return -1;
// If x is longer, it's "greater than" y
if (x.Length > y.Length)
return 1;
// If they have the same length, they're equal
return 0;
}
private static StringSegmentEnumerator GetSegments(string s) => new StringSegmentEnumerator(s);
private struct StringSegmentEnumerator
{
private readonly string _s;
private int _start;
private int _length;
public StringSegmentEnumerator(string s)
{
_s = s;
_start = -1;
_length = 0;
CurrentIsNumber = false;
}
public ReadOnlySpan<char> Current => _s.AsSpan(_start, _length);
public bool CurrentIsNumber { get; private set; }
public bool MoveNext()
{
var currentPosition = _start >= 0
? _start + _length
: 0;
if (currentPosition >= _s.Length)
return false;
int start = currentPosition;
bool isFirstCharDigit = Char.IsDigit(_s[currentPosition]);
while (++currentPosition < _s.Length && Char.IsDigit(_s[currentPosition]) == isFirstCharDigit)
{
}
_start = start;
_length = currentPosition - start;
CurrentIsNumber = isFirstCharDigit;
return true;
}
}
}
Adding to Greg Beech's answer (because I've just been searching for that), if you want to use this from Linq you can use the OrderBy that takes an IComparer. E.g.:
var items = new List<MyItem>();
// fill items
var sorted = items.OrderBy(item => item.Name, new NaturalStringComparer());
Here's a relatively simple example that doesn't use P/Invoke and avoids any allocation during execution.
Feel free to use the code from here, or if it's easier there's a NuGet package:
https://www.nuget.org/packages/NaturalSort
https://github.com/drewnoakes/natural-sort
internal sealed class NaturalStringComparer : IComparer<string>
{
public static NaturalStringComparer Instance { get; } = new NaturalStringComparer();
public int Compare(string x, string y)
{
// sort nulls to the start
if (x == null)
return y == null ? 0 : -1;
if (y == null)
return 1;
var ix = 0;
var iy = 0;
while (true)
{
// sort shorter strings to the start
if (ix >= x.Length)
return iy >= y.Length ? 0 : -1;
if (iy >= y.Length)
return 1;
var cx = x[ix];
var cy = y[iy];
int result;
if (char.IsDigit(cx) && char.IsDigit(cy))
result = CompareInteger(x, y, ref ix, ref iy);
else
result = cx.CompareTo(y[iy]);
if (result != 0)
return result;
ix++;
iy++;
}
}
private static int CompareInteger(string x, string y, ref int ix, ref int iy)
{
var lx = GetNumLength(x, ix);
var ly = GetNumLength(y, iy);
// shorter number first (note, doesn't handle leading zeroes)
if (lx != ly)
return lx.CompareTo(ly);
for (var i = 0; i < lx; i++)
{
var result = x[ix++].CompareTo(y[iy++]);
if (result != 0)
return result;
}
return 0;
}
private static int GetNumLength(string s, int i)
{
var length = 0;
while (i < s.Length && char.IsDigit(s[i++]))
length++;
return length;
}
}
It doesn't ignore leading zeroes, so 01 comes after 2.
Corresponding unit test:
public class NumericStringComparerTests
{
[Fact]
public void OrdersCorrectly()
{
AssertEqual("", "");
AssertEqual(null, null);
AssertEqual("Hello", "Hello");
AssertEqual("Hello123", "Hello123");
AssertEqual("123", "123");
AssertEqual("123Hello", "123Hello");
AssertOrdered("", "Hello");
AssertOrdered(null, "Hello");
AssertOrdered("Hello", "Hello1");
AssertOrdered("Hello123", "Hello124");
AssertOrdered("Hello123", "Hello133");
AssertOrdered("Hello123", "Hello223");
AssertOrdered("123", "124");
AssertOrdered("123", "133");
AssertOrdered("123", "223");
AssertOrdered("123", "1234");
AssertOrdered("123", "2345");
AssertOrdered("0", "1");
AssertOrdered("123Hello", "124Hello");
AssertOrdered("123Hello", "133Hello");
AssertOrdered("123Hello", "223Hello");
AssertOrdered("123Hello", "1234Hello");
}
private static void AssertEqual(string x, string y)
{
Assert.Equal(0, NaturalStringComparer.Instance.Compare(x, y));
Assert.Equal(0, NaturalStringComparer.Instance.Compare(y, x));
}
private static void AssertOrdered(string x, string y)
{
Assert.Equal(-1, NaturalStringComparer.Instance.Compare(x, y));
Assert.Equal( 1, NaturalStringComparer.Instance.Compare(y, x));
}
}
I've actually implemented it as an extension method on the StringComparer so that you could do for example:
StringComparer.CurrentCulture.WithNaturalSort() or
StringComparer.OrdinalIgnoreCase.WithNaturalSort().
The resulting IComparer<string> can be used in all places like OrderBy, OrderByDescending, ThenBy, ThenByDescending, SortedSet<string>, etc. And you can still easily tweak case sensitivity, culture, etc.
The implementation is fairly trivial and it should perform quite well even on large sequences.
I've also published it as a tiny NuGet package, so you can just do:
Install-Package NaturalSort.Extension
The code including XML documentation comments and suite of tests is available in the NaturalSort.Extension GitHub repository.
The entire code is this (if you cannot use C# 7 yet, just install the NuGet package):
public static class StringComparerNaturalSortExtension
{
public static IComparer<string> WithNaturalSort(this StringComparer stringComparer) => new NaturalSortComparer(stringComparer);
private class NaturalSortComparer : IComparer<string>
{
public NaturalSortComparer(StringComparer stringComparer)
{
_stringComparer = stringComparer;
}
private readonly StringComparer _stringComparer;
private static readonly Regex NumberSequenceRegex = new Regex(#"(\d+)", RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static string[] Tokenize(string s) => s == null ? new string[] { } : NumberSequenceRegex.Split(s);
private static ulong ParseNumberOrZero(string s) => ulong.TryParse(s, NumberStyles.None, CultureInfo.InvariantCulture, out var result) ? result : 0;
public int Compare(string s1, string s2)
{
var tokens1 = Tokenize(s1);
var tokens2 = Tokenize(s2);
var zipCompare = tokens1.Zip(tokens2, TokenCompare).FirstOrDefault(x => x != 0);
if (zipCompare != 0)
return zipCompare;
var lengthCompare = tokens1.Length.CompareTo(tokens2.Length);
return lengthCompare;
}
private int TokenCompare(string token1, string token2)
{
var number1 = ParseNumberOrZero(token1);
var number2 = ParseNumberOrZero(token2);
var numberCompare = number1.CompareTo(number2);
if (numberCompare != 0)
return numberCompare;
var stringCompare = _stringComparer.Compare(token1, token2);
return stringCompare;
}
}
}
Inspired by Michael Parker's solution, here is an IComparer implementation that you can drop in to any of the linq ordering methods:
private class NaturalStringComparer : IComparer<string>
{
public int Compare(string left, string right)
{
int max = new[] { left, right }
.SelectMany(x => Regex.Matches(x, #"\d+").Cast<Match>().Select(y => (int?)y.Value.Length))
.Max() ?? 0;
var leftPadded = Regex.Replace(left, #"\d+", m => m.Value.PadLeft(max, '0'));
var rightPadded = Regex.Replace(right, #"\d+", m => m.Value.PadLeft(max, '0'));
return string.Compare(leftPadded, rightPadded);
}
}
Here is a naive one-line regex-less LINQ way (borrowed from python):
var alphaStrings = new List<string>() { "10","2","3","4","50","11","100","a12","b12" };
var orderedString = alphaStrings.OrderBy(g => new Tuple<int, string>(g.ToCharArray().All(char.IsDigit)? int.Parse(g) : int.MaxValue, g));
// Order Now: ["2","3","4","10","11","50","100","a12","b12"]
Expanding on a couple of the previous answers and making use of extension methods, I came up with the following that doesn't have the caveats of potential multiple enumerable enumeration, or performance issues concerned with using multiple regex objects, or calling regex needlessly, that being said, it does use ToList(), which can negate the benefits in larger collections.
The selector supports generic typing to allow any delegate to be assigned, the elements in the source collection are mutated by the selector, then converted to strings with ToString().
private static readonly Regex _NaturalOrderExpr = new Regex(#"\d+", RegexOptions.Compiled);
public static IEnumerable<TSource> OrderByNatural<TSource, TKey>(
this IEnumerable<TSource> source, Func<TSource, TKey> selector)
{
int max = 0;
var selection = source.Select(
o =>
{
var v = selector(o);
var s = v != null ? v.ToString() : String.Empty;
if (!String.IsNullOrWhiteSpace(s))
{
var mc = _NaturalOrderExpr.Matches(s);
if (mc.Count > 0)
{
max = Math.Max(max, mc.Cast<Match>().Max(m => m.Value.Length));
}
}
return new
{
Key = o,
Value = s
};
}).ToList();
return
selection.OrderBy(
o =>
String.IsNullOrWhiteSpace(o.Value) ? o.Value : _NaturalOrderExpr.Replace(o.Value, m => m.Value.PadLeft(max, '0')))
.Select(o => o.Key);
}
public static IEnumerable<TSource> OrderByDescendingNatural<TSource, TKey>(
this IEnumerable<TSource> source, Func<TSource, TKey> selector)
{
int max = 0;
var selection = source.Select(
o =>
{
var v = selector(o);
var s = v != null ? v.ToString() : String.Empty;
if (!String.IsNullOrWhiteSpace(s))
{
var mc = _NaturalOrderExpr.Matches(s);
if (mc.Count > 0)
{
max = Math.Max(max, mc.Cast<Match>().Max(m => m.Value.Length));
}
}
return new
{
Key = o,
Value = s
};
}).ToList();
return
selection.OrderByDescending(
o =>
String.IsNullOrWhiteSpace(o.Value) ? o.Value : _NaturalOrderExpr.Replace(o.Value, m => m.Value.PadLeft(max, '0')))
.Select(o => o.Key);
}
A version that's easier to read/maintain.
public class NaturalStringComparer : IComparer<string>
{
public static NaturalStringComparer Instance { get; } = new NaturalStringComparer();
public int Compare(string x, string y) {
const int LeftIsSmaller = -1;
const int RightIsSmaller = 1;
const int Equal = 0;
var leftString = x;
var rightString = y;
var stringComparer = CultureInfo.CurrentCulture.CompareInfo;
int rightIndex;
int leftIndex;
for (leftIndex = 0, rightIndex = 0;
leftIndex < leftString.Length && rightIndex < rightString.Length;
leftIndex++, rightIndex++) {
var leftChar = leftString[leftIndex];
var rightChar = rightString[leftIndex];
var leftIsNumber = char.IsNumber(leftChar);
var rightIsNumber = char.IsNumber(rightChar);
if (!leftIsNumber && !rightIsNumber) {
var result = stringComparer.Compare(leftString, leftIndex, 1, rightString, leftIndex, 1);
if (result != 0) return result;
} else if (leftIsNumber && !rightIsNumber) {
return LeftIsSmaller;
} else if (!leftIsNumber && rightIsNumber) {
return RightIsSmaller;
} else {
var leftNumberLength = NumberLength(leftString, leftIndex, out var leftNumber);
var rightNumberLength = NumberLength(rightString, rightIndex, out var rightNumber);
if (leftNumberLength < rightNumberLength) {
return LeftIsSmaller;
} else if (leftNumberLength > rightNumberLength) {
return RightIsSmaller;
} else {
if(leftNumber < rightNumber) {
return LeftIsSmaller;
} else if(leftNumber > rightNumber) {
return RightIsSmaller;
}
}
}
}
if (leftString.Length < rightString.Length) {
return LeftIsSmaller;
} else if(leftString.Length > rightString.Length) {
return RightIsSmaller;
}
return Equal;
}
public int NumberLength(string str, int offset, out int number) {
if (string.IsNullOrWhiteSpace(str)) throw new ArgumentNullException(nameof(str));
if (offset >= str.Length) throw new ArgumentOutOfRangeException(nameof(offset), offset, "Offset must be less than the length of the string.");
var currentOffset = offset;
var curChar = str[currentOffset];
if (!char.IsNumber(curChar))
throw new ArgumentException($"'{curChar}' is not a number.", nameof(offset));
int length = 1;
var numberString = string.Empty;
for (currentOffset = offset + 1;
currentOffset < str.Length;
currentOffset++, length++) {
curChar = str[currentOffset];
numberString += curChar;
if (!char.IsNumber(curChar)) {
number = int.Parse(numberString);
return length;
}
}
number = int.Parse(numberString);
return length;
}
}
We had a need for a natural sort to deal with text with the following pattern:
"Test 1-1-1 something"
"Test 1-2-3 something"
...
For some reason when I first looked on SO, I didn't find this post and implemented our own. Compared to some of the solutions presented here, while similar in concept, it could have the benefit of maybe being simpler and easier to understand. However, while I did try to look at performance bottlenecks, It is still a much slower implementation than the default OrderBy().
Here is the extension method I implement:
public static class EnumerableExtensions
{
// set up the regex parser once and for all
private static readonly Regex Regex = new Regex(#"\d+|\D+", RegexOptions.Compiled | RegexOptions.Singleline);
// stateless comparer can be built once
private static readonly AggregateComparer Comparer = new AggregateComparer();
public static IEnumerable<T> OrderByNatural<T>(this IEnumerable<T> source, Func<T, string> selector)
{
// first extract string from object using selector
// then extract digit and non-digit groups
Func<T, IEnumerable<IComparable>> splitter =
s => Regex.Matches(selector(s))
.Cast<Match>()
.Select(m => Char.IsDigit(m.Value[0]) ? (IComparable) int.Parse(m.Value) : m.Value);
return source.OrderBy(splitter, Comparer);
}
/// <summary>
/// This comparer will compare two lists of objects against each other
/// </summary>
/// <remarks>Objects in each list are compare to their corresponding elements in the other
/// list until a difference is found.</remarks>
private class AggregateComparer : IComparer<IEnumerable<IComparable>>
{
public int Compare(IEnumerable<IComparable> x, IEnumerable<IComparable> y)
{
return
x.Zip(y, (a, b) => new {a, b}) // walk both lists
.Select(pair => pair.a.CompareTo(pair.b)) // compare each object
.FirstOrDefault(result => result != 0); // until a difference is found
}
}
}
The idea is to split the original strings into blocks of digits and non-digits ("\d+|\D+"). Since this is a potentially expensive task, it is done only once per entry. We then use a comparer of comparable objects (sorry, I can't find a more proper way to say it). It compares each block to its corresponding block in the other string.
I would like feedback on how this could be improved and what the major flaws are. Note that maintainability is important to us at this point and we are not currently using this in extremely large data sets.
Let me explain my problem and how i was able to solve it.
Problem:- Sort files based on FileName from FileInfo objects which are retrieved from a Directory.
Solution:- I selected the file names from FileInfo and trimed the ".png" part of the file name. Now, just do List.Sort(), which sorts the filenames in Natural sorting order. Based on my testing i found that having .png messes up sorting order. Have a look at the below code
var imageNameList = new DirectoryInfo(#"C:\Temp\Images").GetFiles("*.png").Select(x =>x.Name.Substring(0, x.Name.Length - 4)).ToList();
imageNameList.Sort();
Anyone have a good resource or provide a sample of a natural order sort in C# for an FileInfo array? I am implementing the IComparer interface in my sorts.
The easiest thing to do is just P/Invoke the built-in function in Windows, and use it as the comparison function in your IComparer:
[DllImport("shlwapi.dll", CharSet = CharSet.Unicode)]
private static extern int StrCmpLogicalW(string psz1, string psz2);
Michael Kaplan has some examples of how this function works here, and the changes that were made for Vista to make it work more intuitively. The plus side of this function is that it will have the same behaviour as the version of Windows it runs on, however this does mean that it differs between versions of Windows so you need to consider whether this is a problem for you.
So a complete implementation would be something like:
[SuppressUnmanagedCodeSecurity]
internal static class SafeNativeMethods
{
[DllImport("shlwapi.dll", CharSet = CharSet.Unicode)]
public static extern int StrCmpLogicalW(string psz1, string psz2);
}
public sealed class NaturalStringComparer : IComparer<string>
{
public int Compare(string a, string b)
{
return SafeNativeMethods.StrCmpLogicalW(a, b);
}
}
public sealed class NaturalFileInfoNameComparer : IComparer<FileInfo>
{
public int Compare(FileInfo a, FileInfo b)
{
return SafeNativeMethods.StrCmpLogicalW(a.Name, b.Name);
}
}
Just thought I'd add to this (with the most concise solution I could find):
public static IOrderedEnumerable<T> OrderByAlphaNumeric<T>(this IEnumerable<T> source, Func<T, string> selector)
{
int max = source
.SelectMany(i => Regex.Matches(selector(i), #"\d+").Cast<Match>().Select(m => (int?)m.Value.Length))
.Max() ?? 0;
return source.OrderBy(i => Regex.Replace(selector(i), #"\d+", m => m.Value.PadLeft(max, '0')));
}
The above pads any numbers in the string to the max length of all numbers in all strings and uses the resulting string to sort.
The cast to (int?) is to allow for collections of strings without any numbers (.Max() on an empty enumerable throws an InvalidOperationException).
None of the existing implementations looked great so I wrote my own. The results are almost identical to the sorting used by modern versions of Windows Explorer (Windows 7/8). The only differences I've seen are 1) although Windows used to (e.g. XP) handle numbers of any length, it's now limited to 19 digits - mine is unlimited, 2) Windows gives inconsistent results with certain sets of Unicode digits - mine works fine (although it doesn't numerically compare digits from surrogate pairs; nor does Windows), and 3) mine can't distinguish different types of non-primary sort weights if they occur in different sections (e.g. "e-1é" vs "é1e-" - the sections before and after the number have diacritic and punctuation weight differences).
public static int CompareNatural(string strA, string strB) {
return CompareNatural(strA, strB, CultureInfo.CurrentCulture, CompareOptions.IgnoreCase);
}
public static int CompareNatural(string strA, string strB, CultureInfo culture, CompareOptions options) {
CompareInfo cmp = culture.CompareInfo;
int iA = 0;
int iB = 0;
int softResult = 0;
int softResultWeight = 0;
while (iA < strA.Length && iB < strB.Length) {
bool isDigitA = Char.IsDigit(strA[iA]);
bool isDigitB = Char.IsDigit(strB[iB]);
if (isDigitA != isDigitB) {
return cmp.Compare(strA, iA, strB, iB, options);
}
else if (!isDigitA && !isDigitB) {
int jA = iA + 1;
int jB = iB + 1;
while (jA < strA.Length && !Char.IsDigit(strA[jA])) jA++;
while (jB < strB.Length && !Char.IsDigit(strB[jB])) jB++;
int cmpResult = cmp.Compare(strA, iA, jA - iA, strB, iB, jB - iB, options);
if (cmpResult != 0) {
// Certain strings may be considered different due to "soft" differences that are
// ignored if more significant differences follow, e.g. a hyphen only affects the
// comparison if no other differences follow
string sectionA = strA.Substring(iA, jA - iA);
string sectionB = strB.Substring(iB, jB - iB);
if (cmp.Compare(sectionA + "1", sectionB + "2", options) ==
cmp.Compare(sectionA + "2", sectionB + "1", options))
{
return cmp.Compare(strA, iA, strB, iB, options);
}
else if (softResultWeight < 1) {
softResult = cmpResult;
softResultWeight = 1;
}
}
iA = jA;
iB = jB;
}
else {
char zeroA = (char)(strA[iA] - (int)Char.GetNumericValue(strA[iA]));
char zeroB = (char)(strB[iB] - (int)Char.GetNumericValue(strB[iB]));
int jA = iA;
int jB = iB;
while (jA < strA.Length && strA[jA] == zeroA) jA++;
while (jB < strB.Length && strB[jB] == zeroB) jB++;
int resultIfSameLength = 0;
do {
isDigitA = jA < strA.Length && Char.IsDigit(strA[jA]);
isDigitB = jB < strB.Length && Char.IsDigit(strB[jB]);
int numA = isDigitA ? (int)Char.GetNumericValue(strA[jA]) : 0;
int numB = isDigitB ? (int)Char.GetNumericValue(strB[jB]) : 0;
if (isDigitA && (char)(strA[jA] - numA) != zeroA) isDigitA = false;
if (isDigitB && (char)(strB[jB] - numB) != zeroB) isDigitB = false;
if (isDigitA && isDigitB) {
if (numA != numB && resultIfSameLength == 0) {
resultIfSameLength = numA < numB ? -1 : 1;
}
jA++;
jB++;
}
}
while (isDigitA && isDigitB);
if (isDigitA != isDigitB) {
// One number has more digits than the other (ignoring leading zeros) - the longer
// number must be larger
return isDigitA ? 1 : -1;
}
else if (resultIfSameLength != 0) {
// Both numbers are the same length (ignoring leading zeros) and at least one of
// the digits differed - the first difference determines the result
return resultIfSameLength;
}
int lA = jA - iA;
int lB = jB - iB;
if (lA != lB) {
// Both numbers are equivalent but one has more leading zeros
return lA > lB ? -1 : 1;
}
else if (zeroA != zeroB && softResultWeight < 2) {
softResult = cmp.Compare(strA, iA, 1, strB, iB, 1, options);
softResultWeight = 2;
}
iA = jA;
iB = jB;
}
}
if (iA < strA.Length || iB < strB.Length) {
return iA < strA.Length ? 1 : -1;
}
else if (softResult != 0) {
return softResult;
}
return 0;
}
The signature matches the Comparison<string> delegate:
string[] files = Directory.GetFiles(#"C:\");
Array.Sort(files, CompareNatural);
Here's a wrapper class for use as IComparer<string>:
public class CustomComparer<T> : IComparer<T> {
private Comparison<T> _comparison;
public CustomComparer(Comparison<T> comparison) {
_comparison = comparison;
}
public int Compare(T x, T y) {
return _comparison(x, y);
}
}
Example:
string[] files = Directory.EnumerateFiles(#"C:\")
.OrderBy(f => f, new CustomComparer<string>(CompareNatural))
.ToArray();
Here's a good set of filenames I use for testing:
Func<string, string> expand = (s) => { int o; while ((o = s.IndexOf('\\')) != -1) { int p = o + 1;
int z = 1; while (s[p] == '0') { z++; p++; } int c = Int32.Parse(s.Substring(p, z));
s = s.Substring(0, o) + new string(s[o - 1], c) + s.Substring(p + z); } return s; };
string encodedFileNames =
"KDEqLW4xMiotbjEzKjAwMDFcMDY2KjAwMlwwMTcqMDA5XDAxNyowMlwwMTcqMDlcMDE3KjEhKjEtISox" +
"LWEqMS4yNT8xLjI1KjEuNT8xLjUqMSoxXDAxNyoxXDAxOCoxXDAxOSoxXDA2NioxXDA2NyoxYSoyXDAx" +
"NyoyXDAxOCo5XDAxNyo5XDAxOCo5XDA2Nio9MSphMDAxdGVzdDAxKmEwMDF0ZXN0aW5nYTBcMzEqYTAw" +
"Mj9hMDAyIGE/YTAwMiBhKmEwMDIqYTAwMmE/YTAwMmEqYTAxdGVzdGluZ2EwMDEqYTAxdnNmcyphMSph" +
"MWEqYTF6KmEyKmIwMDAzcTYqYjAwM3E0KmIwM3E1KmMtZSpjZCpjZipmIDEqZipnP2cgMT9oLW4qaG8t" +
"bipJKmljZS1jcmVhbT9pY2VjcmVhbT9pY2VjcmVhbS0/ajBcNDE/ajAwMWE/ajAxP2shKmsnKmstKmsx" +
"KmthKmxpc3QqbTAwMDNhMDA1YSptMDAzYTAwMDVhKm0wMDNhMDA1Km0wMDNhMDA1YSpuMTIqbjEzKm8t" +
"bjAxMypvLW4xMipvLW40P28tbjQhP28tbjR6P28tbjlhLWI1Km8tbjlhYjUqb24wMTMqb24xMipvbjQ/" +
"b240IT9vbjR6P29uOWEtYjUqb245YWI1Km/CrW4wMTMqb8KtbjEyKnAwMCpwMDEqcDAxwr0hKnAwMcK9" +
"KnAwMcK9YSpwMDHCvcK+KnAwMipwMMK9KnEtbjAxMypxLW4xMipxbjAxMypxbjEyKnItMDAhKnItMDAh" +
"NSpyLTAwIe+8lSpyLTAwYSpyLe+8kFwxIS01KnIt77yQXDEhLe+8lSpyLe+8kFwxISpyLe+8kFwxITUq" +
"ci3vvJBcMSHvvJUqci3vvJBcMWEqci3vvJBcMyE1KnIwMCEqcjAwLTUqcjAwLjUqcjAwNSpyMDBhKnIw" +
"NSpyMDYqcjQqcjUqctmg2aYqctmkKnLZpSpy27Dbtipy27Qqctu1KnLfgN+GKnLfhCpy34UqcuClpuCl" +
"rCpy4KWqKnLgpasqcuCnpuCnrCpy4KeqKnLgp6sqcuCppuCprCpy4KmqKnLgqasqcuCrpuCrrCpy4Kuq" +
"KnLgq6sqcuCtpuCtrCpy4K2qKnLgrasqcuCvpuCvrCpy4K+qKnLgr6sqcuCxpuCxrCpy4LGqKnLgsasq" +
"cuCzpuCzrCpy4LOqKnLgs6sqcuC1puC1rCpy4LWqKnLgtasqcuC5kOC5lipy4LmUKnLguZUqcuC7kOC7" +
"lipy4LuUKnLgu5UqcuC8oOC8pipy4LykKnLgvKUqcuGBgOGBhipy4YGEKnLhgYUqcuGCkOGClipy4YKU" +
"KnLhgpUqcuGfoOGfpipy4Z+kKnLhn6UqcuGgkOGglipy4aCUKnLhoJUqcuGlhuGljCpy4aWKKnLhpYsq" +
"cuGnkOGnlipy4aeUKnLhp5UqcuGtkOGtlipy4a2UKnLhrZUqcuGusOGutipy4a60KnLhrrUqcuGxgOGx" +
"hipy4bGEKnLhsYUqcuGxkOGxlipy4bGUKnLhsZUqcuqYoFwx6pilKnLqmKDqmKUqcuqYoOqYpipy6pik" +
"KnLqmKUqcuqjkOqjlipy6qOUKnLqo5UqcuqkgOqkhipy6qSEKnLqpIUqcuqpkOqplipy6qmUKnLqqZUq" +
"cvCQkqAqcvCQkqUqcvCdn5gqcvCdn50qcu+8kFwxISpy77yQXDEt77yVKnLvvJBcMS7vvJUqcu+8kFwx" +
"YSpy77yQXDHqmKUqcu+8kFwx77yO77yVKnLvvJBcMe+8lSpy77yQ77yVKnLvvJDvvJYqcu+8lCpy77yV" +
"KnNpKnPEsSp0ZXN02aIqdGVzdNmi2aAqdGVzdNmjKnVBZS0qdWFlKnViZS0qdUJlKnVjZS0xw6kqdWNl" +
"McOpLSp1Y2Uxw6kqdWPDqS0xZSp1Y8OpMWUtKnVjw6kxZSp3ZWlhMSp3ZWlhMip3ZWlzczEqd2Vpc3My" +
"KndlaXoxKndlaXoyKndlacOfMSp3ZWnDnzIqeSBhMyp5IGE0KnknYTMqeSdhNCp5K2EzKnkrYTQqeS1h" +
"Myp5LWE0KnlhMyp5YTQqej96IDA1MD96IDIxP3ohMjE/ejIwP3oyMj96YTIxP3rCqTIxP1sxKl8xKsKt" +
"bjEyKsKtbjEzKsSwKg==";
string[] fileNames = Encoding.UTF8.GetString(Convert.FromBase64String(encodedFileNames))
.Replace("*", ".txt?").Split(new[] { "?" }, StringSplitOptions.RemoveEmptyEntries)
.Select(n => expand(n)).ToArray();
Matthews Horsleys answer is the fastest method which doesn't change behaviour depending on which version of windows your program is running on. However, it can be even faster by creating the regex once, and using RegexOptions.Compiled. I also added the option of inserting a string comparer so you can ignore case if needed, and improved readability a bit.
public static IEnumerable<T> OrderByNatural<T>(this IEnumerable<T> items, Func<T, string> selector, StringComparer stringComparer = null)
{
var regex = new Regex(#"\d+", RegexOptions.Compiled);
int maxDigits = items
.SelectMany(i => regex.Matches(selector(i)).Cast<Match>().Select(digitChunk => (int?)digitChunk.Value.Length))
.Max() ?? 0;
return items.OrderBy(i => regex.Replace(selector(i), match => match.Value.PadLeft(maxDigits, '0')), stringComparer ?? StringComparer.CurrentCulture);
}
Use by
var sortedEmployees = employees.OrderByNatural(emp => emp.Name);
This takes 450ms to sort 100,000 strings compared to 300ms for the default .net string comparison - pretty fast!
Pure C# solution for linq orderby:
http://zootfroot.blogspot.com/2009/09/natural-sort-compare-with-linq-orderby.html
public class NaturalSortComparer<T> : IComparer<string>, IDisposable
{
private bool isAscending;
public NaturalSortComparer(bool inAscendingOrder = true)
{
this.isAscending = inAscendingOrder;
}
#region IComparer<string> Members
public int Compare(string x, string y)
{
throw new NotImplementedException();
}
#endregion
#region IComparer<string> Members
int IComparer<string>.Compare(string x, string y)
{
if (x == y)
return 0;
string[] x1, y1;
if (!table.TryGetValue(x, out x1))
{
x1 = Regex.Split(x.Replace(" ", ""), "([0-9]+)");
table.Add(x, x1);
}
if (!table.TryGetValue(y, out y1))
{
y1 = Regex.Split(y.Replace(" ", ""), "([0-9]+)");
table.Add(y, y1);
}
int returnVal;
for (int i = 0; i < x1.Length && i < y1.Length; i++)
{
if (x1[i] != y1[i])
{
returnVal = PartCompare(x1[i], y1[i]);
return isAscending ? returnVal : -returnVal;
}
}
if (y1.Length > x1.Length)
{
returnVal = 1;
}
else if (x1.Length > y1.Length)
{
returnVal = -1;
}
else
{
returnVal = 0;
}
return isAscending ? returnVal : -returnVal;
}
private static int PartCompare(string left, string right)
{
int x, y;
if (!int.TryParse(left, out x))
return left.CompareTo(right);
if (!int.TryParse(right, out y))
return left.CompareTo(right);
return x.CompareTo(y);
}
#endregion
private Dictionary<string, string[]> table = new Dictionary<string, string[]>();
public void Dispose()
{
table.Clear();
table = null;
}
}
My solution:
void Main()
{
new[] {"a4","a3","a2","a10","b5","b4","b400","1","C1d","c1d2"}.OrderBy(x => x, new NaturalStringComparer()).Dump();
}
public class NaturalStringComparer : IComparer<string>
{
private static readonly Regex _re = new Regex(#"(?<=\D)(?=\d)|(?<=\d)(?=\D)", RegexOptions.Compiled);
public int Compare(string x, string y)
{
x = x.ToLower();
y = y.ToLower();
if(string.Compare(x, 0, y, 0, Math.Min(x.Length, y.Length)) == 0)
{
if(x.Length == y.Length) return 0;
return x.Length < y.Length ? -1 : 1;
}
var a = _re.Split(x);
var b = _re.Split(y);
int i = 0;
while(true)
{
int r = PartCompare(a[i], b[i]);
if(r != 0) return r;
++i;
}
}
private static int PartCompare(string x, string y)
{
int a, b;
if(int.TryParse(x, out a) && int.TryParse(y, out b))
return a.CompareTo(b);
return x.CompareTo(y);
}
}
Results:
1
a2
a3
a4
a10
b4
b5
b400
C1d
c1d2
You do need to be careful -- I vaguely recall reading that StrCmpLogicalW, or something like it, was not strictly transitive, and I have observed .NET's sort methods to sometimes get stuck in infinite loops if the comparison function breaks that rule.
A transitive comparison will always report that a < c if a < b and b < c. There exists a function that does a natural sort order comparison that does not always meet that criterion, but I can't recall whether it is StrCmpLogicalW or something else.
This is my code to sort a string having both alpha and numeric characters.
First, this extension method:
public static IEnumerable<string> AlphanumericSort(this IEnumerable<string> me)
{
return me.OrderBy(x => Regex.Replace(x, #"\d+", m => m.Value.PadLeft(50, '0')));
}
Then, simply use it anywhere in your code like this:
List<string> test = new List<string>() { "The 1st", "The 12th", "The 2nd" };
test = test.AlphanumericSort();
How does it works ? By replaceing with zeros:
Original | Regex Replace | The | Returned
List | Apply PadLeft | Sorting | List
| | |
"The 1st" | "The 001st" | "The 001st" | "The 1st"
"The 12th" | "The 012th" | "The 002nd" | "The 2nd"
"The 2nd" | "The 002nd" | "The 012th" | "The 12th"
Works with multiples numbers:
Alphabetical Sorting | Alphanumeric Sorting
|
"Page 21, Line 42" | "Page 3, Line 7"
"Page 21, Line 5" | "Page 3, Line 32"
"Page 3, Line 32" | "Page 21, Line 5"
"Page 3, Line 7" | "Page 21, Line 42"
Hope that's will help.
Here's a version for .NET Core 2.1+ / .NET 5.0+, using spans to avoid allocations
public class NaturalSortStringComparer : IComparer<string>
{
public static NaturalSortStringComparer Ordinal { get; } = new NaturalSortStringComparer(StringComparison.Ordinal);
public static NaturalSortStringComparer OrdinalIgnoreCase { get; } = new NaturalSortStringComparer(StringComparison.OrdinalIgnoreCase);
public static NaturalSortStringComparer CurrentCulture { get; } = new NaturalSortStringComparer(StringComparison.CurrentCulture);
public static NaturalSortStringComparer CurrentCultureIgnoreCase { get; } = new NaturalSortStringComparer(StringComparison.CurrentCultureIgnoreCase);
public static NaturalSortStringComparer InvariantCulture { get; } = new NaturalSortStringComparer(StringComparison.InvariantCulture);
public static NaturalSortStringComparer InvariantCultureIgnoreCase { get; } = new NaturalSortStringComparer(StringComparison.InvariantCultureIgnoreCase);
private readonly StringComparison _comparison;
public NaturalSortStringComparer(StringComparison comparison)
{
_comparison = comparison;
}
public int Compare(string x, string y)
{
// Let string.Compare handle the case where x or y is null
if (x is null || y is null)
return string.Compare(x, y, _comparison);
var xSegments = GetSegments(x);
var ySegments = GetSegments(y);
while (xSegments.MoveNext() && ySegments.MoveNext())
{
int cmp;
// If they're both numbers, compare the value
if (xSegments.CurrentIsNumber && ySegments.CurrentIsNumber)
{
var xValue = long.Parse(xSegments.Current);
var yValue = long.Parse(ySegments.Current);
cmp = xValue.CompareTo(yValue);
if (cmp != 0)
return cmp;
}
// If x is a number and y is not, x is "lesser than" y
else if (xSegments.CurrentIsNumber)
{
return -1;
}
// If y is a number and x is not, x is "greater than" y
else if (ySegments.CurrentIsNumber)
{
return 1;
}
// OK, neither are number, compare the segments as text
cmp = xSegments.Current.CompareTo(ySegments.Current, _comparison);
if (cmp != 0)
return cmp;
}
// At this point, either all segments are equal, or one string is shorter than the other
// If x is shorter, it's "lesser than" y
if (x.Length < y.Length)
return -1;
// If x is longer, it's "greater than" y
if (x.Length > y.Length)
return 1;
// If they have the same length, they're equal
return 0;
}
private static StringSegmentEnumerator GetSegments(string s) => new StringSegmentEnumerator(s);
private struct StringSegmentEnumerator
{
private readonly string _s;
private int _start;
private int _length;
public StringSegmentEnumerator(string s)
{
_s = s;
_start = -1;
_length = 0;
CurrentIsNumber = false;
}
public ReadOnlySpan<char> Current => _s.AsSpan(_start, _length);
public bool CurrentIsNumber { get; private set; }
public bool MoveNext()
{
var currentPosition = _start >= 0
? _start + _length
: 0;
if (currentPosition >= _s.Length)
return false;
int start = currentPosition;
bool isFirstCharDigit = Char.IsDigit(_s[currentPosition]);
while (++currentPosition < _s.Length && Char.IsDigit(_s[currentPosition]) == isFirstCharDigit)
{
}
_start = start;
_length = currentPosition - start;
CurrentIsNumber = isFirstCharDigit;
return true;
}
}
}
Adding to Greg Beech's answer (because I've just been searching for that), if you want to use this from Linq you can use the OrderBy that takes an IComparer. E.g.:
var items = new List<MyItem>();
// fill items
var sorted = items.OrderBy(item => item.Name, new NaturalStringComparer());
Here's a relatively simple example that doesn't use P/Invoke and avoids any allocation during execution.
Feel free to use the code from here, or if it's easier there's a NuGet package:
https://www.nuget.org/packages/NaturalSort
https://github.com/drewnoakes/natural-sort
internal sealed class NaturalStringComparer : IComparer<string>
{
public static NaturalStringComparer Instance { get; } = new NaturalStringComparer();
public int Compare(string x, string y)
{
// sort nulls to the start
if (x == null)
return y == null ? 0 : -1;
if (y == null)
return 1;
var ix = 0;
var iy = 0;
while (true)
{
// sort shorter strings to the start
if (ix >= x.Length)
return iy >= y.Length ? 0 : -1;
if (iy >= y.Length)
return 1;
var cx = x[ix];
var cy = y[iy];
int result;
if (char.IsDigit(cx) && char.IsDigit(cy))
result = CompareInteger(x, y, ref ix, ref iy);
else
result = cx.CompareTo(y[iy]);
if (result != 0)
return result;
ix++;
iy++;
}
}
private static int CompareInteger(string x, string y, ref int ix, ref int iy)
{
var lx = GetNumLength(x, ix);
var ly = GetNumLength(y, iy);
// shorter number first (note, doesn't handle leading zeroes)
if (lx != ly)
return lx.CompareTo(ly);
for (var i = 0; i < lx; i++)
{
var result = x[ix++].CompareTo(y[iy++]);
if (result != 0)
return result;
}
return 0;
}
private static int GetNumLength(string s, int i)
{
var length = 0;
while (i < s.Length && char.IsDigit(s[i++]))
length++;
return length;
}
}
It doesn't ignore leading zeroes, so 01 comes after 2.
Corresponding unit test:
public class NumericStringComparerTests
{
[Fact]
public void OrdersCorrectly()
{
AssertEqual("", "");
AssertEqual(null, null);
AssertEqual("Hello", "Hello");
AssertEqual("Hello123", "Hello123");
AssertEqual("123", "123");
AssertEqual("123Hello", "123Hello");
AssertOrdered("", "Hello");
AssertOrdered(null, "Hello");
AssertOrdered("Hello", "Hello1");
AssertOrdered("Hello123", "Hello124");
AssertOrdered("Hello123", "Hello133");
AssertOrdered("Hello123", "Hello223");
AssertOrdered("123", "124");
AssertOrdered("123", "133");
AssertOrdered("123", "223");
AssertOrdered("123", "1234");
AssertOrdered("123", "2345");
AssertOrdered("0", "1");
AssertOrdered("123Hello", "124Hello");
AssertOrdered("123Hello", "133Hello");
AssertOrdered("123Hello", "223Hello");
AssertOrdered("123Hello", "1234Hello");
}
private static void AssertEqual(string x, string y)
{
Assert.Equal(0, NaturalStringComparer.Instance.Compare(x, y));
Assert.Equal(0, NaturalStringComparer.Instance.Compare(y, x));
}
private static void AssertOrdered(string x, string y)
{
Assert.Equal(-1, NaturalStringComparer.Instance.Compare(x, y));
Assert.Equal( 1, NaturalStringComparer.Instance.Compare(y, x));
}
}
I've actually implemented it as an extension method on the StringComparer so that you could do for example:
StringComparer.CurrentCulture.WithNaturalSort() or
StringComparer.OrdinalIgnoreCase.WithNaturalSort().
The resulting IComparer<string> can be used in all places like OrderBy, OrderByDescending, ThenBy, ThenByDescending, SortedSet<string>, etc. And you can still easily tweak case sensitivity, culture, etc.
The implementation is fairly trivial and it should perform quite well even on large sequences.
I've also published it as a tiny NuGet package, so you can just do:
Install-Package NaturalSort.Extension
The code including XML documentation comments and suite of tests is available in the NaturalSort.Extension GitHub repository.
The entire code is this (if you cannot use C# 7 yet, just install the NuGet package):
public static class StringComparerNaturalSortExtension
{
public static IComparer<string> WithNaturalSort(this StringComparer stringComparer) => new NaturalSortComparer(stringComparer);
private class NaturalSortComparer : IComparer<string>
{
public NaturalSortComparer(StringComparer stringComparer)
{
_stringComparer = stringComparer;
}
private readonly StringComparer _stringComparer;
private static readonly Regex NumberSequenceRegex = new Regex(#"(\d+)", RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static string[] Tokenize(string s) => s == null ? new string[] { } : NumberSequenceRegex.Split(s);
private static ulong ParseNumberOrZero(string s) => ulong.TryParse(s, NumberStyles.None, CultureInfo.InvariantCulture, out var result) ? result : 0;
public int Compare(string s1, string s2)
{
var tokens1 = Tokenize(s1);
var tokens2 = Tokenize(s2);
var zipCompare = tokens1.Zip(tokens2, TokenCompare).FirstOrDefault(x => x != 0);
if (zipCompare != 0)
return zipCompare;
var lengthCompare = tokens1.Length.CompareTo(tokens2.Length);
return lengthCompare;
}
private int TokenCompare(string token1, string token2)
{
var number1 = ParseNumberOrZero(token1);
var number2 = ParseNumberOrZero(token2);
var numberCompare = number1.CompareTo(number2);
if (numberCompare != 0)
return numberCompare;
var stringCompare = _stringComparer.Compare(token1, token2);
return stringCompare;
}
}
}
Inspired by Michael Parker's solution, here is an IComparer implementation that you can drop in to any of the linq ordering methods:
private class NaturalStringComparer : IComparer<string>
{
public int Compare(string left, string right)
{
int max = new[] { left, right }
.SelectMany(x => Regex.Matches(x, #"\d+").Cast<Match>().Select(y => (int?)y.Value.Length))
.Max() ?? 0;
var leftPadded = Regex.Replace(left, #"\d+", m => m.Value.PadLeft(max, '0'));
var rightPadded = Regex.Replace(right, #"\d+", m => m.Value.PadLeft(max, '0'));
return string.Compare(leftPadded, rightPadded);
}
}
Here is a naive one-line regex-less LINQ way (borrowed from python):
var alphaStrings = new List<string>() { "10","2","3","4","50","11","100","a12","b12" };
var orderedString = alphaStrings.OrderBy(g => new Tuple<int, string>(g.ToCharArray().All(char.IsDigit)? int.Parse(g) : int.MaxValue, g));
// Order Now: ["2","3","4","10","11","50","100","a12","b12"]
Expanding on a couple of the previous answers and making use of extension methods, I came up with the following that doesn't have the caveats of potential multiple enumerable enumeration, or performance issues concerned with using multiple regex objects, or calling regex needlessly, that being said, it does use ToList(), which can negate the benefits in larger collections.
The selector supports generic typing to allow any delegate to be assigned, the elements in the source collection are mutated by the selector, then converted to strings with ToString().
private static readonly Regex _NaturalOrderExpr = new Regex(#"\d+", RegexOptions.Compiled);
public static IEnumerable<TSource> OrderByNatural<TSource, TKey>(
this IEnumerable<TSource> source, Func<TSource, TKey> selector)
{
int max = 0;
var selection = source.Select(
o =>
{
var v = selector(o);
var s = v != null ? v.ToString() : String.Empty;
if (!String.IsNullOrWhiteSpace(s))
{
var mc = _NaturalOrderExpr.Matches(s);
if (mc.Count > 0)
{
max = Math.Max(max, mc.Cast<Match>().Max(m => m.Value.Length));
}
}
return new
{
Key = o,
Value = s
};
}).ToList();
return
selection.OrderBy(
o =>
String.IsNullOrWhiteSpace(o.Value) ? o.Value : _NaturalOrderExpr.Replace(o.Value, m => m.Value.PadLeft(max, '0')))
.Select(o => o.Key);
}
public static IEnumerable<TSource> OrderByDescendingNatural<TSource, TKey>(
this IEnumerable<TSource> source, Func<TSource, TKey> selector)
{
int max = 0;
var selection = source.Select(
o =>
{
var v = selector(o);
var s = v != null ? v.ToString() : String.Empty;
if (!String.IsNullOrWhiteSpace(s))
{
var mc = _NaturalOrderExpr.Matches(s);
if (mc.Count > 0)
{
max = Math.Max(max, mc.Cast<Match>().Max(m => m.Value.Length));
}
}
return new
{
Key = o,
Value = s
};
}).ToList();
return
selection.OrderByDescending(
o =>
String.IsNullOrWhiteSpace(o.Value) ? o.Value : _NaturalOrderExpr.Replace(o.Value, m => m.Value.PadLeft(max, '0')))
.Select(o => o.Key);
}
A version that's easier to read/maintain.
public class NaturalStringComparer : IComparer<string>
{
public static NaturalStringComparer Instance { get; } = new NaturalStringComparer();
public int Compare(string x, string y) {
const int LeftIsSmaller = -1;
const int RightIsSmaller = 1;
const int Equal = 0;
var leftString = x;
var rightString = y;
var stringComparer = CultureInfo.CurrentCulture.CompareInfo;
int rightIndex;
int leftIndex;
for (leftIndex = 0, rightIndex = 0;
leftIndex < leftString.Length && rightIndex < rightString.Length;
leftIndex++, rightIndex++) {
var leftChar = leftString[leftIndex];
var rightChar = rightString[leftIndex];
var leftIsNumber = char.IsNumber(leftChar);
var rightIsNumber = char.IsNumber(rightChar);
if (!leftIsNumber && !rightIsNumber) {
var result = stringComparer.Compare(leftString, leftIndex, 1, rightString, leftIndex, 1);
if (result != 0) return result;
} else if (leftIsNumber && !rightIsNumber) {
return LeftIsSmaller;
} else if (!leftIsNumber && rightIsNumber) {
return RightIsSmaller;
} else {
var leftNumberLength = NumberLength(leftString, leftIndex, out var leftNumber);
var rightNumberLength = NumberLength(rightString, rightIndex, out var rightNumber);
if (leftNumberLength < rightNumberLength) {
return LeftIsSmaller;
} else if (leftNumberLength > rightNumberLength) {
return RightIsSmaller;
} else {
if(leftNumber < rightNumber) {
return LeftIsSmaller;
} else if(leftNumber > rightNumber) {
return RightIsSmaller;
}
}
}
}
if (leftString.Length < rightString.Length) {
return LeftIsSmaller;
} else if(leftString.Length > rightString.Length) {
return RightIsSmaller;
}
return Equal;
}
public int NumberLength(string str, int offset, out int number) {
if (string.IsNullOrWhiteSpace(str)) throw new ArgumentNullException(nameof(str));
if (offset >= str.Length) throw new ArgumentOutOfRangeException(nameof(offset), offset, "Offset must be less than the length of the string.");
var currentOffset = offset;
var curChar = str[currentOffset];
if (!char.IsNumber(curChar))
throw new ArgumentException($"'{curChar}' is not a number.", nameof(offset));
int length = 1;
var numberString = string.Empty;
for (currentOffset = offset + 1;
currentOffset < str.Length;
currentOffset++, length++) {
curChar = str[currentOffset];
numberString += curChar;
if (!char.IsNumber(curChar)) {
number = int.Parse(numberString);
return length;
}
}
number = int.Parse(numberString);
return length;
}
}
We had a need for a natural sort to deal with text with the following pattern:
"Test 1-1-1 something"
"Test 1-2-3 something"
...
For some reason when I first looked on SO, I didn't find this post and implemented our own. Compared to some of the solutions presented here, while similar in concept, it could have the benefit of maybe being simpler and easier to understand. However, while I did try to look at performance bottlenecks, It is still a much slower implementation than the default OrderBy().
Here is the extension method I implement:
public static class EnumerableExtensions
{
// set up the regex parser once and for all
private static readonly Regex Regex = new Regex(#"\d+|\D+", RegexOptions.Compiled | RegexOptions.Singleline);
// stateless comparer can be built once
private static readonly AggregateComparer Comparer = new AggregateComparer();
public static IEnumerable<T> OrderByNatural<T>(this IEnumerable<T> source, Func<T, string> selector)
{
// first extract string from object using selector
// then extract digit and non-digit groups
Func<T, IEnumerable<IComparable>> splitter =
s => Regex.Matches(selector(s))
.Cast<Match>()
.Select(m => Char.IsDigit(m.Value[0]) ? (IComparable) int.Parse(m.Value) : m.Value);
return source.OrderBy(splitter, Comparer);
}
/// <summary>
/// This comparer will compare two lists of objects against each other
/// </summary>
/// <remarks>Objects in each list are compare to their corresponding elements in the other
/// list until a difference is found.</remarks>
private class AggregateComparer : IComparer<IEnumerable<IComparable>>
{
public int Compare(IEnumerable<IComparable> x, IEnumerable<IComparable> y)
{
return
x.Zip(y, (a, b) => new {a, b}) // walk both lists
.Select(pair => pair.a.CompareTo(pair.b)) // compare each object
.FirstOrDefault(result => result != 0); // until a difference is found
}
}
}
The idea is to split the original strings into blocks of digits and non-digits ("\d+|\D+"). Since this is a potentially expensive task, it is done only once per entry. We then use a comparer of comparable objects (sorry, I can't find a more proper way to say it). It compares each block to its corresponding block in the other string.
I would like feedback on how this could be improved and what the major flaws are. Note that maintainability is important to us at this point and we are not currently using this in extremely large data sets.
Let me explain my problem and how i was able to solve it.
Problem:- Sort files based on FileName from FileInfo objects which are retrieved from a Directory.
Solution:- I selected the file names from FileInfo and trimed the ".png" part of the file name. Now, just do List.Sort(), which sorts the filenames in Natural sorting order. Based on my testing i found that having .png messes up sorting order. Have a look at the below code
var imageNameList = new DirectoryInfo(#"C:\Temp\Images").GetFiles("*.png").Select(x =>x.Name.Substring(0, x.Name.Length - 4)).ToList();
imageNameList.Sort();
I have a problem to solve where given a string source and a collection of search criteria criteria, the algorithm has to return the shortest possible substring of source that contains all items of criteria.
=================================
UPDATE
The same search criteria might be in the source string multiple
times. In that case, it is required to return the sub-string
containing the particular instance of the search criteria such that
it is the shortest among all possible sub-strings.
The search items can contain spaces in them such as hello world
The order in which the search criteria are found does not matter as long as they are all in the resultant sub-string
==================================
String source = "aaa wwwww fgffsd ththththt sss sgsgsgsghs bfbfb hhh sdfg kkk dhdhtrherhrhrthrthrt ddfhdetehehe kkk wdwd aaa vcvc hhh zxzx sss nbnbn";
List<String> criteria = new List<string> { "kkk", "aaa", "sss", "hhh" };
The input above should return the following substring: kkk wdwd aaa vcvc hhh zxzx sss
Unfortunately, I spent a lot of time trying to write such an algorithm but I couldn't get it just right. Below is the code I have got so far:
public struct Extraction
{
public int Start { get; set; }
public int End { get; set; }
public int Length
{
get
{
var length = this.End - this.Start;
return length;
}
}
public Extraction(int start, int end)
{
this.Start = start;
this.End = end;
}
}
public class TextExtractor
{
private String _source;
private Dictionary<String, List<Int32>> _criteriaIndexes;
private Dictionary<String, int> _entryIndex;
public TextExtractor(String source, List<String> searchCriteria)
{
this._source = source;
this._criteriaIndexes = this.ExtractIndexes(source, searchCriteria);
this._entryIndex = _criteriaIndexes.ToDictionary(x => x.Key, v => 0);
}
public String Extract()
{
List<Extraction> possibleExtractions = new List<Extraction>();
int index = 0;
int min = int.MaxValue;
int max = 0;
bool shouldStop = false;
while (index < _criteriaIndexes.Count && !shouldStop)
{
Boolean compareWithAll = index == _criteriaIndexes.Count - 1;
if (!compareWithAll)
{
var current = _criteriaIndexes.ElementAt(index);
this.CalculateMinMax(current, ref min, ref max);
index++;
}
else
{
var entry = _criteriaIndexes.Last();
while (_entryIndex[entry.Key] < entry.Value.Count)
{
int a = min;
int b = max;
this.CalculateMinMax(entry, ref a, ref b);
_entryIndex[entry.Key]++;
Extraction ext = new Extraction(a, b);
possibleExtractions.Add(ext);
}
int k = index - 1;
while (k >= 0)
{
var prev = _criteriaIndexes.ElementAt(k);
if (prev.Value.Count - 1 > _entryIndex[prev.Key])
{
_entryIndex[prev.Key]++;
break;
}
else
{
k--;
}
}
shouldStop = _criteriaIndexes.All(x => x.Value.Count - 1 <= _entryIndex[x.Key]);
_entryIndex[entry.Key] = 0;
index = 0;
min = int.MaxValue;
max = 0;
}
}
Extraction shortest = possibleExtractions.First(x => x.Length.Equals(possibleExtractions.Min(p => p.Length)));
String result = _source.Substring(shortest.Start, shortest.Length);
return result;
}
private Dictionary<String, List<Int32>> ExtractIndexes(String source, List<String> searchCriteria)
{
Dictionary<String, List<Int32>> result = new Dictionary<string, List<int>>();
foreach (var criteria in searchCriteria)
{
Int32 i = 0;
Int32 startingIndex = 0;
var indexes = new List<int>();
while (i > -1)
{
i = source.IndexOf(criteria, startingIndex);
if (i > -1)
{
startingIndex = i + 1;
indexes.Add(i);
}
}
if (indexes.Any())
{
result.Add(criteria, indexes);
}
}
return result;
}
private void CalculateMinMax(KeyValuePair<String, List<int>> current, ref int min, ref int max)
{
int j = current.Value[_entryIndex[current.Key]];
if (j < min)
{
min = j;
}
int indexPlusWordLength = j + current.Key.Length;
if (indexPlusWordLength > max)
{
max = indexPlusWordLength;
}
}
}
I would appreciate it if someone could point out where did I go wrong in my algorithm. Moreover, I kinda feel this is a very naive implementation. Maybe there is a better approach to solve this problem than trying to try out combinations of indexes?
Thanks!
This is a much simpler algorithm that will give you the shortest substring.
void Main()
{
String source = "aaa wwwww fgffsd ththththt sss ww sgsgsgsghs bfbfb hhh sdfg kkk " +
"dhdhtrherhrhrthrthrt ddfhdetehehe kkk wdwd aaa vcvc hhh zxzx sss ww nbnbn";
List<String> criteria = new List<string> { "kkk", "aaa", "sss ww", "hhh" };
var result = GetAllSubstringContainingCriteria(source, criteria)
.OrderBy(sub => sub.Length).FirstOrDefault();
// result is "kkk wdwd aaa vcvc hhh zxzx sss ww"
}
private IEnumerable<string> GetAllSubstringContainingCriteria(
string source, List<string> criteria)
{
for (int i = 0; i < source.Length; i++)
{
var subString = source.Substring(i);
if (criteria.Any(crit => subString.StartsWith(crit)))
{
var lastWordIndex =
GetLastCharacterIndexFromLastCriteriaInSubstring(subString, criteria);
if (lastWordIndex >= 0)
yield return string.Join(" ", subString.Substring(0, lastWordIndex));
}
else
continue;
}
}
private int GetLastCharacterIndexFromLastCriteriaInSubstring(
string subString, List<string> criteria)
{
var results = criteria.Select(crit => new {
index = subString.IndexOf(crit),
criteria = crit});
return results.All(result => result.index >= 0)
? results.Select(result => result.index + result.criteria.Length).Max()
: -1;
}
Let the Java built-in classes do the work. How about converting your criteria to a regular expression Pattern. If the criteria are X or Y or Z . . ., convert this into a regular expression of the form "(X)|(Y)|(Z)|...", compile it, and execute it against the source string.
This, of course, returns the leftmost match. You could code a very straightforward loop that iterates across all occurrences, caches them, and chooses the shortest--or the leftmost shortest--or, if two or more are equally short, then all of those.
I'm currently in the process of writing a class that can represent an infinitely large number (in theory). The constructor of this class creates the object from a string value, which is why the number could be of an extremely large, yet unknown, size.
The reason I started writing this class was because I wanted to be able to make a program that would be able to perform mathematical calculations with numbers of arbitrarily large size. Thus, I started writing a class that could handle values well over the standard ranges of integers, floats, doubles, (hopefully) decimals, etc.
Here are the declarations and the main constructor for the class:
/// <summary>
/// Creates a new instance of the LargeDecimal class, which represents either a whole or decimal number.
/// </summary>
/// <param name="number">The string representation of the number.</param>
public LargeDecimal(string value)
{
string number = value.Replace(" ", "");
if (number.Contains("-") && (number.IndexOf('-') == 0)) {
number = number.Replace("-", "");
IsNegative = true;
}
// Determining whether the number is whole or contains a decimal.
if (number.IndexOf('.') == -1) {
// Does not contain a decimal.
for (int i = 0; i < number.Length; i++)
wholeDigits.Add(int.Parse(number[i].ToString()));
IsWhole = true;
}
else {
// Still check if number is whole. Add all decimal digits.
string[] numArray = number.Split('.');
int sumOfDecimalDigits = 0;
for (int i = 0; i < numArray[1].ToString().Length; i++)
sumOfDecimalDigits += int.Parse(numArray[1].ToString()[i].ToString());
if (sumOfDecimalDigits <= 0) {
// Is a whole number.
for (int i = 0; i < numArray[0].ToString().Length; i++)
wholeDigits.Add(int.Parse(numArray[0].ToString()[i].ToString()));
IsWhole = true;
}
else {
// Is not a whole number.
for (int i = 0; i < numArray[0].ToString().Length; i++)
wholeDigits.Add(int.Parse(numArray[0].ToString()[i].ToString()));
for (int i = 0; i < numArray[1].ToString().Length; i++)
decimalDigits.Add(int.Parse(numArray[1].ToString()[i].ToString()));
IsWhole = false;
}
}
}
The class is basically a representation of a number through two lists of type int, where one list represents the digits that make up the whole partition of the number, and the other list represents the digits that make up the decimal partition of the number (if applicable).
I have written an Add method which accepts two LargeDecimal objects, adds their values together, and returns a new LargeDecimal object with the sum as its value. Though incomplete, it does work with LargeDecimal objects that are whole numbers only, and are both positive or both negative (picture!).
I have realized that adding methods to compare two values (greater than / less than / equal to) would be extremely useful in calculations. However, I am not sure how to check whether the value of a LargeDecimal object is greater or less than the value of another LargeDecimal.
There are cases where I can just compare the amount of items in the wholeDigits list, but that is only when the amounts of items are different for both values.
I am unsure about how to compare two numbers such as: 15498765423654973246 and 15499111137583924246.
And I think it will get more difficult if I will try and compare two fractional numbers: 8573819351.86931 and 8573809999.85999
I do not wish to use integer calculations in conjunction with place values (e.g. in the number 831, the value of the number 8 would be 8 * 100, the value of 3 would be 3 * 10, and the value of 1 would be 1 * 1), because I would like this class to be able to represent values of any given size and length and range (while an int cannot handle values up to 2147483647).
Any help regarding this would be highly appreciated! Thank you all!
I would start by implementing IComparable:
public class LargeDecimal : IComparable<LargeDecimal>
And the implementation would look like:
public int CompareTo(LargeDecimal other)
{
if (other == null) return 1;
if (ReferenceEquals(this, other)) return 0;
if (IsNegative != other.IsNegative)
{
if (other.IsNegative) return 1;
return -1;
}
int multiplier = (IsNegative) ? -1 : 1;
if (wholeDigits.Count > other.wholeDigits.Count) return 1 * multiplier;
if (wholeDigits.Count < other.wholeDigits.Count) return -1 * multiplier;
for (int i = 0; i < wholeDigits.Count; i++)
{
if (wholeDigits[i] > other.wholeDigits[i]) return 1 * multiplier;
if (wholeDigits[i] < other.wholeDigits[i]) return -1 * multiplier;
}
for (int i = 0; i < Math.Min(decimalDigits.Count, other.decimalDigits.Count); i++)
{
if (decimalDigits[i] > other.decimalDigits[i]) return 1 * multiplier;
if (decimalDigits[i] < other.decimalDigits[i]) return -1 * multiplier;
}
if (decimalDigits.Count > other.decimalDigits.Count) return 1 * multiplier;
if (decimalDigits.Count < other.decimalDigits.Count) return -1 * multiplier;
return 0;
}
Update
This project was sitting on my brain at dinner tonight, so I went at it some more for fun. Not sure if this is helpful, but figured I'd share what I came up with.
First, I added fields to make the class actually work:
public bool IsNegative { get; private set; }
public bool IsWhole { get; private set; }
private List<int> wholeDigits;
private List<int> decimalDigits;
Second, I overrode the ToString method so the numbers display nicely:
public override string ToString()
{
return string.Format("{0}{1}{2}{3}",
(IsNegative) ? "-" : "",
string.Join("", wholeDigits),
(IsWhole) ? "" : ".",
(IsWhole) ? "" : string.Join("", decimalDigits));
}
Then I implemented the Equals methods so they work as expected for a number type:
public static bool Equals(LargeDecimal first, LargeDecimal second)
{
return ReferenceEquals(first, null)
? ReferenceEquals(second, null)
: first.Equals(second);
}
public override bool Equals(object obj)
{
return Equals(obj as LargeDecimal);
}
protected bool Equals(LargeDecimal other)
{
return CompareTo(other) == 0;
}
public override int GetHashCode()
{
unchecked
{
var hashCode = (wholeDigits != null)
? wholeDigits.GetHashCode()
: 0;
hashCode = (hashCode * 397) ^
(decimalDigits != null ? decimalDigits.GetHashCode() : 0);
hashCode = (hashCode * 397) ^ IsNegative.GetHashCode();
hashCode = (hashCode * 397) ^ IsWhole.GetHashCode();
return hashCode;
}
}
Next, I added some utility methods to help out with some upcoming tasks:
private void ResetToZero()
{
wholeDigits = new List<int> { 0 };
decimalDigits = new List<int> { 0 };
IsWhole = true;
IsNegative = false;
}
private void NormalizeLists()
{
RemoveLeadingZeroes(wholeDigits);
RemoveTrailingZeroes(decimalDigits);
IsWhole = (decimalDigits.Count == 0
|| (decimalDigits.Count == 1 && decimalDigits[0] == 0));
}
private void AddLeadingZeroes(List<int> list, int numberOfZeroes)
{
if (list == null) return;
for (int i = 0; i < numberOfZeroes; i++)
{
list.Insert(0, 0);
}
}
private void AddTrailingZeroes(List<int> list, int numberOfZeroes)
{
if (list == null) return;
for (int i = 0; i < numberOfZeroes; i++)
{
list.Add(0);
}
}
private void RemoveLeadingZeroes(List<int> list, bool leaveOneIfEmpty = true)
{
if (list == null) return;
var temp = list;
for (int i = 0; i < temp.Count; i++)
{
if (temp[i] == 0)
{
list.RemoveAt(i);
}
else
{
break;
}
}
if (leaveOneIfEmpty && !list.Any()) list.Add(0);
}
private void RemoveTrailingZeroes(List<int> list, bool leaveOneIfEmpty = true)
{
if (list == null) return;
var temp = list;
for (int i = temp.Count -1; i >= 0; i--)
{
if (temp[i] == 0)
{
list.RemoveAt(i);
}
else
{
break;
}
}
if (leaveOneIfEmpty && !list.Any()) list.Add(0);
}
Next, I added some constructors. A default that sets the number to '0', one that parses a string, and another that copies the values from another LargeDecimal:
public LargeDecimal() : this("0") { }
public LargeDecimal(string value)
{
if (value == null) throw new ArgumentNullException("value");
string number = value.Replace(" ", ""); // remove spaces
number = number.TrimStart('0'); // remove leading zeroes
IsNegative = (number.IndexOf('-') == 0); // check for negative
number = number.Replace("-", ""); // remove dashes
// add a zero if there were no numbers before a decimal point
if (number.IndexOf('.') == 0) number = "0" + number;
// Initialize lists
wholeDigits = new List<int>();
decimalDigits = new List<int>();
// Get whole and decimal parts of the number
var numberParts = number.Split(new[] {'.'},
StringSplitOptions.RemoveEmptyEntries);
IsWhole = numberParts.Length == 1;
// Add whole digits to the list
wholeDigits.AddRange(numberParts[0].Select(n => int.Parse(n.ToString())));
// Add decimal digits to the list (if there are any)
if (numberParts.Length > 1 &&
numberParts[1].Sum(n => int.Parse(n.ToString())) > 0)
{
numberParts[1] = numberParts[1].TrimEnd('0');
decimalDigits.AddRange(numberParts[1].Select(n => int.Parse(n.ToString())));
}
NormalizeLists();
}
public LargeDecimal(LargeDecimal initializeFrom)
{
wholeDigits = initializeFrom.wholeDigits
.GetRange(0, initializeFrom.wholeDigits.Count);
decimalDigits = initializeFrom.decimalDigits
.GetRange(0, initializeFrom.decimalDigits.Count);
IsWhole = initializeFrom.IsWhole;
IsNegative = initializeFrom.IsNegative;
NormalizeLists();
}
Then I implemented the Add and Subtract methods
public void Add(LargeDecimal other)
{
if (other == null) return;
if (IsNegative != other.IsNegative)
{
// Get the absolue values of the two operands
var absThis = new LargeDecimal(this) {IsNegative = false};
var absOther = new LargeDecimal(other) {IsNegative = false};
// If the signs are different and the values are the same, reset to 0.
if (absThis == absOther)
{
ResetToZero();
return;
}
// Since the signs are different, we will retain the sign of the larger number
IsNegative = absThis < absOther ? other.IsNegative : IsNegative;
// Assign the difference of the two absolute values
absThis.Subtract(absOther);
wholeDigits = absThis.wholeDigits.GetRange(0, absThis.wholeDigits.Count);
decimalDigits = absThis.decimalDigits.GetRange(0, absThis.decimalDigits.Count);
NormalizeLists();
return;
}
// start with the larger decimal digits list
var newDecimalDigits = new List<int>();
newDecimalDigits = decimalDigits.Count > other.decimalDigits.Count
? decimalDigits.GetRange(0, decimalDigits.Count)
: other.decimalDigits.GetRange(0, other.decimalDigits.Count);
// and add the smaller one to it
int carry = 0; // Represents the value of the 'tens' digit to carry over
for (int i = Math.Min(decimalDigits.Count, other.decimalDigits.Count) - 1; i >= 0; i--)
{
var result = decimalDigits[i] + other.decimalDigits[i] + carry;
carry = Convert.ToInt32(Math.Floor((decimal) result / 10));
result = result % 10;
newDecimalDigits[i] = result;
}
var newWholeDigits = new List<int>();
newWholeDigits = wholeDigits.Count > other.wholeDigits.Count
? wholeDigits.GetRange(0, wholeDigits.Count)
: other.wholeDigits.GetRange(0, other.wholeDigits.Count);
for (int i = Math.Min(wholeDigits.Count, other.wholeDigits.Count) - 1; i >= 0; i--)
{
var result = wholeDigits[i] + other.wholeDigits[i] + carry;
carry = Convert.ToInt32(Math.Floor((decimal)result / 10));
result = result % 10;
newWholeDigits[i] = result;
}
if (carry > 0) newWholeDigits.Insert(0, carry);
wholeDigits = newWholeDigits.GetRange(0, newWholeDigits.Count);
decimalDigits = newDecimalDigits.GetRange(0, newDecimalDigits.Count);
NormalizeLists();
}
public void Subtract(LargeDecimal other)
{
if (other == null) return;
// If the other value is the same as this one, then the difference is zero
if (Equals(other))
{
ResetToZero();
return;
}
// Absolute values will be used to determine how we subtract
var absThis = new LargeDecimal(this) {IsNegative = false};
var absOther = new LargeDecimal(other) {IsNegative = false};
// If the signs are different, then the difference will be the sum
if (IsNegative != other.IsNegative)
{
absThis.Add(absOther);
wholeDigits = absThis.wholeDigits.GetRange(0, absThis.wholeDigits.Count);
decimalDigits = absThis.decimalDigits.GetRange(0, absThis.decimalDigits.Count);
NormalizeLists();
return;
}
// Subtract smallNumber from bigNumber to get the difference
LargeDecimal bigNumber;
LargeDecimal smallNumber;
if (absThis < absOther)
{
bigNumber = new LargeDecimal(absOther);
smallNumber = new LargeDecimal(absThis);
}
else
{
bigNumber = new LargeDecimal(absThis);
smallNumber = new LargeDecimal(absOther);
}
// Pad the whole number and decimal number lists where necessary so that both
// LargeDecimal objects have the same count of whole and decimal numbers.
AddTrailingZeroes(
bigNumber.decimalDigits.Count < smallNumber.decimalDigits.Count
? bigNumber.decimalDigits
: smallNumber.decimalDigits,
Math.Abs(bigNumber.decimalDigits.Count - smallNumber.decimalDigits.Count));
AddLeadingZeroes(smallNumber.wholeDigits,
Math.Abs(bigNumber.wholeDigits.Count - smallNumber.wholeDigits.Count));
var newWholeDigits = new List<int>();
var newDecimalDigits = new List<int>();
bool borrowed = false; // True if we borrowed 1 from next number
for (int i = bigNumber.decimalDigits.Count - 1; i >= 0; i--)
{
if (borrowed)
{
bigNumber.decimalDigits[i] -= 1; // We borrowed one from this number last time
borrowed = false;
}
if (bigNumber.decimalDigits[i] < smallNumber.decimalDigits[i])
{
bigNumber.decimalDigits[i] += 10; // Borrow from next number and add to this one
borrowed = true;
}
// Since we're working from the back of the list, always add to the front
newDecimalDigits.Insert(0, bigNumber.decimalDigits[i] - smallNumber.decimalDigits[i]);
}
for (int i = bigNumber.wholeDigits.Count - 1; i >= 0; i--)
{
if (borrowed)
{
bigNumber.wholeDigits[i] -= 1;
borrowed = false;
}
if (bigNumber.wholeDigits[i] < smallNumber.wholeDigits[i])
{
bigNumber.wholeDigits[i] += 10;
borrowed = true;
}
newWholeDigits.Insert(0, bigNumber.wholeDigits[i] - smallNumber.wholeDigits[i]);
}
if (absThis < absOther) IsNegative = !IsNegative;
wholeDigits = newWholeDigits.GetRange(0, newWholeDigits.Count);
decimalDigits = newDecimalDigits.GetRange(0, newDecimalDigits.Count);
NormalizeLists();
}
And finally overrode the numeric operators:
public static LargeDecimal operator +(LargeDecimal first, LargeDecimal second)
{
if (first == null) return second;
if (second == null) return first;
var result = new LargeDecimal(first);
result.Add(second);
return result;
}
public static LargeDecimal operator -(LargeDecimal first, LargeDecimal second)
{
if (first == null) return second;
if (second == null) return first;
var result = new LargeDecimal(first);
result.Subtract(second);
return result;
}
public static bool operator >(LargeDecimal first, LargeDecimal second)
{
if (first == null) return false;
return first.CompareTo(second) > 0;
}
public static bool operator <(LargeDecimal first, LargeDecimal second)
{
if (second == null) return false;
return second.CompareTo(first) > 0;
}
public static bool operator >=(LargeDecimal first, LargeDecimal second)
{
if (first == null) return false;
return first.CompareTo(second) >= 0;
}
public static bool operator <=(LargeDecimal first, LargeDecimal second)
{
if (second == null) return false;
return second.CompareTo(first) >= 0;
}
public static bool operator ==(LargeDecimal first, LargeDecimal second)
{
return Equals(first, second);
}
public static bool operator !=(LargeDecimal first, LargeDecimal second)
{
return !Equals(first, second);
}
Thanks for the fun challenge!!
Assuming that this implementation looks something like this:
List<int> WholeList;
List<int> FactionalList;
bool IsNegative;
and they both grow away from the decimal point, then a comparison algorithm would go like this
First compare signs. Negative is always less than positive.
Compare lengths of WholeList, longer has larger magnitude (larger number is dependent on sign)
If WholeList.Count the same. Compare each digit starting with most significant (aka WholeList[Count-1] first), first that are different between numbers will determine larger magnitude.
If you make it into the FractionalList, and then run out of digits in one list. The number with the longer FractionalList will be the larger magnitude.
i have a class:
public class Essay
{
public int ID{get;set;}
public string Name{get;set;}
}
and list of Essay type
List<Essay> essays=new List<Essay>();
on the name property contains numbers and letters.
i want to sort the list by the name property
for example:
essays=
{1,"ccccc"},
{2,"aaaa"},
{3,"bbbb"},
{4,"10"},
{5,"1"},
{6,"2"},
{7,"1a"}
i want to sort:
essays=
{2,"aaaa"},
{3,"bbbb"},
{1,"ccccc"},
{5,"1"},
{7,"1a"},
{6,"2"},
{4,"10"}
how i do it?
thank to all.
The name mixes characters and numbers, which leads to a awkward sorting if you simply sort alphabetically.
What you more seems to prefer is called naturally sorting, where the numbers within the text are really recognized as a number and the sort short apply their on a numeric basis.
A few implementations of this algorithm can be found on Jeffs webpage.
The one i most prefer is the one from Dave:
/*
* The Alphanum Algorithm is an improved sorting algorithm for strings
* containing numbers. Instead of sorting numbers in ASCII order like
* a standard sort, this algorithm sorts numbers in numeric order.
*
* The Alphanum Algorithm is discussed at http://www.DaveKoelle.com
*
* Based on the Java implementation of Dave Koelle's Alphanum algorithm.
* Contributed by Jonathan Ruckwood <jonathan.ruckwood#gmail.com>
*
* Adapted by Dominik Hurnaus <dominik.hurnaus#gmail.com> to
* - correctly sort words where one word starts with another word
* - have slightly better performance
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
using System;
using System.Collections;
using System.Text;
/*
* Please compare against the latest Java version at http://www.DaveKoelle.com
* to see the most recent modifications
*/
namespace AlphanumComparator
{
public class AlphanumComparator : IComparer
{
private enum ChunkType {Alphanumeric, Numeric};
private bool InChunk(char ch, char otherCh)
{
ChunkType type = ChunkType.Alphanumeric;
if (char.IsDigit(otherCh))
{
type = ChunkType.Numeric;
}
if ((type == ChunkType.Alphanumeric && char.IsDigit(ch))
|| (type == ChunkType.Numeric && !char.IsDigit(ch)))
{
return false;
}
return true;
}
public int Compare(object x, object y)
{
String s1 = x as string;
String s2 = y as string;
if (s1 == null || s2 == null)
{
return 0;
}
int thisMarker = 0, thisNumericChunk = 0;
int thatMarker = 0, thatNumericChunk = 0;
while ((thisMarker < s1.Length) || (thatMarker < s2.Length))
{
if (thisMarker >= s1.Length)
{
return -1;
}
else if (thatMarker >= s2.Length)
{
return 1;
}
char thisCh = s1[thisMarker];
char thatCh = s2[thatMarker];
StringBuilder thisChunk = new StringBuilder();
StringBuilder thatChunk = new StringBuilder();
while ((thisMarker < s1.Length) && (thisChunk.Length==0 ||InChunk(thisCh, thisChunk[0])))
{
thisChunk.Append(thisCh);
thisMarker++;
if (thisMarker < s1.Length)
{
thisCh = s1[thisMarker];
}
}
while ((thatMarker < s2.Length) && (thatChunk.Length==0 ||InChunk(thatCh, thatChunk[0])))
{
thatChunk.Append(thatCh);
thatMarker++;
if (thatMarker < s2.Length)
{
thatCh = s2[thatMarker];
}
}
int result = 0;
// If both chunks contain numeric characters, sort them numerically
if (char.IsDigit(thisChunk[0]) && char.IsDigit(thatChunk[0]))
{
thisNumericChunk = Convert.ToInt32(thisChunk.ToString());
thatNumericChunk = Convert.ToInt32(thatChunk.ToString());
if (thisNumericChunk < thatNumericChunk)
{
result = -1;
}
if (thisNumericChunk > thatNumericChunk)
{
result = 1;
}
}
else
{
result = thisChunk.ToString().CompareTo(thatChunk.ToString());
}
if (result != 0)
{
return result;
}
}
return 0;
}
}
}
There are several elements to the answer.
The first part is being able to in-place sort a List using Sort() and a lambda comparison method. That's solved by using an extension method for IList and a helper "ComparisonDelegator" class. Combining those, it's possible to pass a lambda to List.Sort().
The second part has been addressed in another post here (which I have upvoted) and the code from which I have shamelessly pasted into the AlphanumComparator class in this answer.
(As a side note, I should point out that all the Linq examples posted elsewhere in this thread make a COPY of the list. This is fine for short lists, but if you have a long list it can cause performance problems. The solution presented here does NOT make a copy of the list.)
Putting it all together, we get the following code, which outputs:
ID=2, Name=aaaa
ID=3, Name=bbbb
ID=1, Name=ccccc
ID=5, Name=1
ID=7, Name=1a
ID=6, Name=2
ID=4, Name=10
And the full code sample (compilable as a console application):
using System;
using System.Collections;
using System.Collections.Generic;
using System.Globalization;
using System.Text;
namespace Demo
{
public static class Program
{
public static void Main()
{
var list = new List<Essay>
{
new Essay {ID=1, Name="ccccc"},
new Essay {ID=2, Name="aaaa"},
new Essay {ID=3, Name="bbbb"},
new Essay {ID=4, Name="10"},
new Essay {ID=5, Name="1"},
new Essay {ID=6, Name="2"},
new Essay {ID=7, Name="1a"}
};
var comp = new AlphanumComparator();
list.Sort((lhs, rhs) => comp.Compare(lhs.Name, rhs.Name));
foreach (var essay in list)
{
Console.WriteLine("ID={0}, Name={1}", essay.ID, essay.Name);
}
}
}
public class Essay
{
public int ID
{
get;
set;
}
public string Name
{
get;
set;
}
}
/// <summary>Extensions for IList{T}</summary>
public static class ListExt
{
/// <summary> Sorts an IList{T} in place. </summary>
public static void Sort<T>(this IList<T> list, Comparison<T> comparison)
{
ArrayList.Adapter((IList)list).Sort(new ComparisonDelegator<T>(comparison));
}
}
/// <summary>
/// Provides a mechanism for easily converting a Comparison<> delegate (or lambda) to an IComparer<>.
/// This can be used for List.BinarySearch(), for example.
/// </summary>
/// <typeparam name="T">The type of items to be compared.</typeparam>
public sealed class ComparisonDelegator<T>: IComparer<T>, IComparer
{
/// <summary>Create from a Comparison<> delegate.</summary>
/// <param name="comparison">A Comparison<> delegate.</param>
public ComparisonDelegator(Comparison<T> comparison)
{
this._comparison = comparison;
}
/// <summary>Implements the IComparer.Compare() method.</summary>
public int Compare(T x, T y)
{
return _comparison(x, y);
}
/// <summary>Implements the IComparer.Compare() method.</summary>
public int Compare(object x, object y)
{
return _comparison((T)x, (T)y);
}
/// <summary>Used to store the Comparison delegate.</summary>
private readonly Comparison<T> _comparison;
}
/// <summary>
/// Special class to sort strings "naturally",
/// but to place non-numeric items *before* numeric items.
/// </summary>
public class AlphanumComparator : IComparer
{
private enum ChunkType {Alphanumeric, Numeric};
private bool InChunk(char ch, char otherCh)
{
ChunkType type = ChunkType.Alphanumeric;
if (char.IsDigit(otherCh))
{
type = ChunkType.Numeric;
}
if ((type == ChunkType.Alphanumeric && char.IsDigit(ch))
|| (type == ChunkType.Numeric && !char.IsDigit(ch)))
{
return false;
}
return true;
}
public int Compare(object x, object y)
{
String s1 = x as string;
String s2 = y as string;
if (s1 == null || s2 == null)
{
return 0;
}
int thisMarker = 0, thisNumericChunk = 0;
int thatMarker = 0, thatNumericChunk = 0;
while ((thisMarker < s1.Length) || (thatMarker < s2.Length))
{
if (thisMarker >= s1.Length)
{
return -1;
}
else if (thatMarker >= s2.Length)
{
return 1;
}
char thisCh = s1[thisMarker];
char thatCh = s2[thatMarker];
StringBuilder thisChunk = new StringBuilder();
StringBuilder thatChunk = new StringBuilder();
while ((thisMarker < s1.Length) && (thisChunk.Length==0 ||InChunk(thisCh, thisChunk[0])))
{
thisChunk.Append(thisCh);
thisMarker++;
if (thisMarker < s1.Length)
{
thisCh = s1[thisMarker];
}
}
while ((thatMarker < s2.Length) && (thatChunk.Length==0 ||InChunk(thatCh, thatChunk[0])))
{
thatChunk.Append(thatCh);
thatMarker++;
if (thatMarker < s2.Length)
{
thatCh = s2[thatMarker];
}
}
int result = 0;
// If both chunks contain numeric characters, sort them numerically
if (char.IsDigit(thisChunk[0]) && char.IsDigit(thatChunk[0]))
{
thisNumericChunk = Convert.ToInt32(thisChunk.ToString());
thatNumericChunk = Convert.ToInt32(thatChunk.ToString());
if (thisNumericChunk < thatNumericChunk)
{
result = -1;
}
if (thisNumericChunk > thatNumericChunk)
{
result = 1;
}
}
else if (char.IsDigit(thisChunk[0]) && !char.IsDigit(thatChunk[0]))
{
return 1; // Ensure that non-numeric sorts before numeric.
}
else if (!char.IsDigit(thisChunk[0]) && char.IsDigit(thatChunk[0]))
{
return -1; // Ensure that non-numeric sorts before numeric.
}
else
{
result = thisChunk.ToString().CompareTo(thatChunk.ToString());
}
if (result != 0)
{
return result;
}
}
return 0;
}
}
}
You may just change numeric chars with chars with higher ascii values and do a orderby and revert back.
List<Essay> essays = new List<Essay>();
essays.Add(new Essay(){ID = 1, Name = "ccccc"});
essays.Add(new Essay(){ID = 2, Name = "aaaa"});
essays.Add(new Essay(){ID = 3, Name = "bbbb"});
essays.Add(new Essay(){ID = 4, Name = "10"});
essays.Add(new Essay(){ID = 5, Name = "1"});
essays.Add(new Essay(){ID = 6, Name = "2"});
essays.Add(new Essay(){ID = 7, Name = "1a"});
essays.ForEach(q => Replace(q));
var result = essays.OrderBy(q => q.Name).ToList();
result.ForEach(q => Revert(q));
Related Functions:
public void Replace(Essay x)
{
x.Name = x.Name.Replace('0', ((char)240));
x.Name = x.Name.Replace('1', ((char)241));
x.Name = x.Name.Replace('2', ((char)242));
x.Name = x.Name.Replace('3', ((char)243));
x.Name = x.Name.Replace('4', ((char)244));
x.Name = x.Name.Replace('5', ((char)245));
x.Name = x.Name.Replace('6', ((char)246));
x.Name = x.Name.Replace('7', ((char)247));
x.Name = x.Name.Replace('8', ((char)248));
x.Name = x.Name.Replace('9', ((char)249));
}
public void Revert(Essay x)
{
x.Name = x.Name.Replace(((char)240), '0');
x.Name = x.Name.Replace(((char)241), '1');
x.Name = x.Name.Replace(((char)242), '2');
x.Name = x.Name.Replace(((char)243), '3');
x.Name = x.Name.Replace(((char)244), '4');
x.Name = x.Name.Replace(((char)245), '5');
x.Name = x.Name.Replace(((char)246), '6');
x.Name = x.Name.Replace(((char)247), '7');
x.Name = x.Name.Replace(((char)248), '8');
x.Name = x.Name.Replace(((char)249), '9');
}
I have created an IComparer implementation and use OrderBy to sort the list. I use a regex to capture the digits at the start of the Name and if they exist use those to order the items, if not then just compare the names using a string comparison. A name starting with a number always comes after all names starting with letters. It gives the output you have requested.
public class EssayNameComparer : IComparer<string>
{
private Regex _digits = new Regex("(\\d+)(.*)");
public int Compare(string a, string b)
{
Match matcha = _digits.Match(a);
Match matchb = _digits.Match(b);
if (matcha.Success && matchb.Success)
{
int numa = int.Parse(matcha.Groups[1].Value);
int numb = int.Parse(matchb.Groups[1].Value);
return numa.CompareTo(numb);
}
else if (matcha.Success)
{
return 1;
}
else if (matchb.Success)
{
return -1;
}
else
{
return string.Compare(a, b);
}
}
}
public class Program
{
public static void Main(string[] args)
{
List<Essay> essays= new List<Essay>() {
new Essay { ID = 1, Name = "ccccc"},
new Essay { ID = 2, Name = "aaaa"},
new Essay { ID = 3, Name = "bbbb"},
new Essay { ID = 4, Name = "10"},
new Essay { ID = 5, Name = "1"},
new Essay { ID = 6, Name = "2"},
new Essay { ID = 7, Name = "1a"}
};
foreach(Essay essay in essays.OrderBy(e => e.Name, new EssayNameComparer()))
{
Console.WriteLine(essay.Name);
}
}
}
Output:
aaaa
bbbb
ccccc
1
1a
2
10