Comparison of multi-part alphanumeric strings - c#

I receive a string which contains the sw version currently running in the system.
I want to do some operations only if the system is running on a certain sw version or
later.
e.g. If system is running sw version 2.D or later (2.E, ..) I do some operations. If system is running lower sw version (2.C, ..), then I don't do it.
How to do this comparison for strings?

There are many ways of doing this, but if you know that the string is in the form x.y or even x.y.z... then I would suggest using a custom StringComparer:
using System;
using System.Linq;
using Microsoft.VisualStudio.TestTools.UnitTesting;
namespace StingComparisons
{
[TestClass]
public class StringComparison
{
[TestMethod]
public void TestMethod1()
{
var a = "2.E";
var b = "2.F";
var c = "2.C";
var d = "1.F";
var e = "3.A";
StringComparer comp = new MyStringComparer();
Assert.IsTrue(b.IsSameOrAfter(a, comp));
Assert.IsFalse(c.IsSameOrAfter(a, comp));
Assert.IsFalse(d.IsSameOrAfter(a, comp));
Assert.IsTrue(e.IsSameOrAfter(a, comp));
Assert.IsTrue(a.IsSameOrAfter(a, comp));
}
[TestMethod]
public void TestMethod2()
{
var a = "2.E.1";
var b = "2.E";
var c = "2.E.2";
var d = "2.F";
var e = "2.D.3";
var f = "3.A";
StringComparer comp = new MyStringComparer();
Assert.IsFalse(b.DotDelimitedIsSameOrAfter(a));
Assert.IsTrue(c.DotDelimitedIsSameOrAfter(a));
Assert.IsTrue(d.DotDelimitedIsSameOrAfter(a));
Assert.IsFalse(e.DotDelimitedIsSameOrAfter(a));
Assert.IsTrue(f.DotDelimitedIsSameOrAfter(a));
Assert.IsTrue(a.DotDelimitedIsSameOrAfter(a));
}
}
public static class stringExtensions
{
public static bool DotDelimitedIsSameOrAfter(this string a, string b)
{
return a.IsSameOrAfter(b, new MyStringComparer());
}
public static bool IsSameOrAfter(this string a, string b, StringComparer comp)
{
return comp.Compare(a, b) <= 0;
}
}
public class MyStringComparer : StringComparer
{
public override int Compare(string x, string y)
{
var partsX = x.Split('.');
var partsY = y.Split('.');
for (int i = 0; i < partsY.Length; i++)
{
if (partsX.Length <= i)
return 1;
var partComp = partsY[i].CompareTo(partsX[i]);
if (partComp != 0)
return partComp;
}
return 0;
}
public override bool Equals(string x, string y)
{
return x.Equals(y);
}
public override int GetHashCode(string obj)
{
return obj.GetHashCode();
}
}
}
I have wrapped the calls in an extension method purely to make it a bit more readable.

Since I was needing just that (and a bit more), I've written the following comparer which compares two alphanumeric version strings:
/// <summary>
/// Compares two alphanumeric version numbers.
/// </summary>
public class AlphanumericVersionComparer : IComparer<string>
{
/// <summary>
/// Compares two alphanumeric version numbers and returns a value
/// indicating whether one is less than, equal to, or greater than the other.
/// </summary>
/// <param name="x">The first alphanumeric version number to compare.</param>
/// <param name="y">The second alphanumeric version number to compare.</param>
/// <returns>A signed integer that indicates the relative values of x and y.</returns>
public int Compare(string x, string y)
{
// Validate parameters
if (x == null) throw new ArgumentNullException("x");
if (y == null) throw new ArgumentNullException("y");
// Test for equality
if (x == y)
return 0;
// Split the different parts of the number
string[] xParts = x.Split('.');
string[] yParts = y.Split('.');
// Compare each parts
AlphanumericComparer alphaNumComparer = new AlphanumericComparer();
for (int i = 0, n = Math.Max(xParts.Length, yParts.Length); i < n; i++)
{
// If the part at index i is not in y => x is greater
if (i >= yParts.Length)
return 1;
// If the part at index i is not in x => y is greater
if (i >= xParts.Length)
return -1;
// Compare the two alphanumerical numbers
int result = alphaNumComparer.Compare(xParts[i], yParts[i]);
if (result != 0)
{
return result;
}
}
// The two numbers are equal (really??? I thought we tested for equality already!)
System.Diagnostics.Debug.Fail("Not supposed to reach this code...");
return 0;
}
}
/// <summary>
/// Compares two alphanumeric strings.
/// </summary>
/// <remarks>See http://snipd.net/alphanumericnatural-sorting-in-c-using-icomparer </remarks>
public class AlphanumericComparer : IComparer<string>
{
/// <summary>
/// Compares two alphanumerics and returns a value
/// indicating whether one is less than, equal to, or greater than the other.
/// </summary>
/// <param name="x">The first alphanumeric to compare.</param>
/// <param name="y">The second alphanumeric to compare.</param>
/// <returns>A signed integer that indicates the relative values of x and y.</returns>
public int Compare(string x, string y)
{
int len1 = x.Length;
int len2 = y.Length;
int marker1 = 0;
int marker2 = 0;
// Walk through two the strings with two markers.
while (marker1 < len1 && marker2 < len2)
{
char ch1 = x[marker1];
char ch2 = y[marker2];
// Some buffers we can build up characters in for each chunk.
char[] space1 = new char[len1];
int loc1 = 0;
char[] space2 = new char[len2];
int loc2 = 0;
// Walk through all following characters that are digits or
// characters in BOTH strings starting at the appropriate marker.
// Collect char arrays.
do
{
space1[loc1++] = ch1;
marker1++;
if (marker1 < len1)
{
ch1 = x[marker1];
}
else
{
break;
}
} while (char.IsDigit(ch1) == char.IsDigit(space1[0]));
do
{
space2[loc2++] = ch2;
marker2++;
if (marker2 < len2)
{
ch2 = y[marker2];
}
else
{
break;
}
} while (char.IsDigit(ch2) == char.IsDigit(space2[0]));
// If we have collected numbers, compare them numerically.
// Otherwise, if we have strings, compare them alphabetically.
string str1 = new string(space1);
string str2 = new string(space2);
int result;
if (char.IsDigit(space1[0]) && char.IsDigit(space2[0]))
{
int thisNumericChunk = int.Parse(str1);
int thatNumericChunk = int.Parse(str2);
result = thisNumericChunk.CompareTo(thatNumericChunk);
}
else
{
result = str1.CompareTo(str2);
}
if (result != 0)
{
return result;
}
}
return len1 - len2;
}
}
You can play with the fiddle here: https://dotnetfiddle.net/28iius.
All credits to http://snipd.net/alphanumericnatural-sorting-in-c-using-icomparer for the alphanumeric comparison.

Related

How to take X amount of questions, that sum up Y amount of difficulty

I have a table
Questions -> Question(string), Difficulty (int, 1-10)
I need to create a method, that as the title mentions, takes X amount of questions, whose difficulty should sum up Y.
For example:
getQuestions(2,10) -> Question1 (diff: 4), Question2 (diff: 6)
getQuestions(3,15) -> Question3 (diff: 5), Question4 (diff: 5), Question5 (diff: 5)
How can I achieve something like this with LINQ?
Here's one way to do it, using a modified version of the recursive solution found here: Finding all possible combinations of numbers to reach a given sum
First, a public method that will do some quick validation and then call a recursive method to get the results:
/// <summary>
/// Gets lists of numQuestions length of all combinations
/// of questions whose difficulties add up to sumDifficulty
/// </summary>
/// <param name="questions">The list of questions to search</param>
/// <param name="numQuestions">The number of questions required</param>
/// <param name="sumDifficulty">The amount that the difficulties should sum to</param>
/// <returns></returns>
public static List<List<Question>> GetQuestions(List<Question> questions,
int numQuestions, int sumDifficulty)
{
if (questions == null) throw new ArgumentNullException("questions");
var results = new List<List<Question>>();
// Fail fast argument validation
if (numQuestions < 1 ||
numQuestions > questions.Count ||
sumDifficulty < numQuestions * Question.MinDifficulty ||
sumDifficulty > numQuestions * Question.MaxDifficulty)
{
return results;
}
// If we only need single questions, no need to do any recursion
if (numQuestions == 1)
{
results.AddRange(questions.Where(q => q.Difficulty == sumDifficulty)
.Select(q => new List<Question> {q}));
return results;
}
// We can remove any questions who have a difficulty that's higher
// than the sumDifficulty minus the number of questions plus one
var candidateQuestions =
questions.Where(q => q.Difficulty <= sumDifficulty - numQuestions + 1)
.ToList();
if (!candidateQuestions.Any())
{
return results;
}
GetSumsRecursively(candidateQuestions, sumDifficulty, new List<Question>(),
numQuestions, results);
return results;
}
And then the recursive method that does the heavy lifting:
private static void GetSumsRecursively(IReadOnlyList<Question> questions,
int sumDifficulty, List<Question> candidates, int numQuestions,
ICollection<List<Question>> results)
{
int candidateSum = candidates.Sum(x => x.Difficulty);
if (candidateSum == sumDifficulty && candidates.Count == numQuestions)
{
results.Add(candidates);
}
if (candidateSum >= sumDifficulty)
return;
for (int i = 0; i < questions.Count; i++)
{
var remaining = new List<Question>();
for (int j = i + 1; j < questions.Count; j++)
{
remaining.Add(questions[j]);
}
var filteredCandidates = new List<Question>(candidates) {questions[i]};
GetSumsRecursively(remaining, sumDifficulty, filteredCandidates,
numQuestions, results);
}
}
Here's an example usage:
public static void Main()
{
const int numberOfQuestions = 3;
const int sumOfDifficulty = 15;
// Since I don't have your table, I'm using a list of objects to fake it
var questions = new List<Question>();
for (int i = 1; i < 11; i++)
{
questions.Add(new Question {Difficulty = i % 10 + 1,
QuestionString = "Question #" + i});
}
var results = GetQuestions(questions, numberOfQuestions, sumOfDifficulty);
// Write output to console to verify results
foreach (var result in results)
{
Console.WriteLine("{0} = {1}", string.Join(" + ",
result.Select(r => r.Difficulty)), sumOfDifficulty);
}
}
And just so you have everything to make this work, here's my Question class used to fake your table:
internal class Question
{
public const int MinDifficulty = 1;
public const int MaxDifficulty = 10;
private int _difficulty;
public int Difficulty
{
get { return _difficulty; }
set
{
if (value < MinDifficulty) _difficulty = MinDifficulty;
else if (value > MaxDifficulty) _difficulty = MaxDifficulty;
else _difficulty = value;
}
}
public string QuestionString { get; set; }
}

Detecting Filename Patterns for Creating RegEx

When using a string to define a RegEx, I'd like to know if there is a way to get my code to recognize a pattern in the files contained within a directory.
The goal is to rename these files using our naming conventions, so I'm writing something to try to create the expression to use in RegEx.
I've started something here, but I don't think it is the best, and I'm not sure how to fill in the "{0}" portion of my RegEx expression.
private Regex m_regex;
public string DirPattern(string path, string[] extensions) {
string result = null;
int endPos = 0;
int resLen = 0;
int startLen = 0;
var dir = new DirectoryInfo(path);
foreach (var file in dir.GetFiles()) {
if (extensions.Contains(file.Extension)) {
if (!String.IsNullOrEmpty(result)) {
int sL = 0;
int fileLen = file.Name.Length;
string one = null;
for (int i = 0; i < resLen && i < fileLen; i++) {
if (result[i] == file.Name[i]) {
sL = i + 1;
if (String.IsNullOrEmpty(one)) {
one = file.Name;
} else {
break;
}
}
}
if (!String.IsNullOrEmpty(one)) {
int eP = 0;
int oneLen = one.Length;
for (int i = fileLen - 1; -1 < i; i--) {
if (result[i] == file.Name[i]) {
eP = i - 1;
} else {
break;
}
}
if ((0 < endPos) && (eP == endPos)) {
if ((0 < startLen) && (sL == startLen)) {
result = one.Substring(0, startLen) + "{0}" + one.Substring(endPos);
} else if (0 < sL) {
startLen = sL;
}
} else if (0 < sL) {
startLen = sL;
}
}
} else {
result = file.Name;
resLen = result.Length;
}
}
}
return result;
}
public bool GenerateRexEx(string path, string[] extensions) {
var pattern = DirPattern(path, extensions);
if (!String.IsNullOrEmpty(pattern)) {
m_regex = new Regex(pattern);
return true;
}
return false;
}
Here is an example of a list of files that would be most like our company files (which I am not allowed to post):
UPDATE:
The goal is to take files with names like this:
FOLDER_PATTERN_1 + MixedContent + FOLDER_PATTERN_2
and rename them using our format:
OUR_PATTERN_1 + MixedContent + OUR_PATTERN_2
That way, our software will be able to search the files more efficiently.
I think that in your case you need just to find count of characters in the prefix pattern and postfix pattern. Then you can simply replace some count of characters with your pattern. I wrote a simple code which I tested and works. You can inspire yourself and use the same method I think. Anyway there are areas to make this better, but I hope it is enough to answer your question.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace ConsoleApplication1
{
static class Program
{
static void Main()
{
var inputFilenames = new string[]
{
"mtn_flint501-muxed",
"mtn_flint502-muxed",
"mtn_flint503-muxed",
"mtn_flint504-muxed",
"mtn_flint505-muxed",
"mtn_flint506-muxed",
"mtn_flint507-muxed",
"mtn_flint508-muxed",
"mtn_flint509-muxed",
"mtn_flint510-muxed",
"mtn_flint511-muxed",
"mtn_flint512-muxed",
};
var replacedFilenames = ReplaceFileNames(inputFilenames);
for (int i = 0; i < inputFilenames.Length; i++)
{
Console.WriteLine("{0} >> {1}", inputFilenames[i], replacedFilenames[i]);
}
Console.ReadKey();
}
private const string OurPrefixPattern = "Prefix_";
private const string OurPostfixPattern = "_Postfix";
/// <summary>
/// Method which will find the filename's pattern and replace it with our pattern
/// </summary>
/// <param name="fileNames"></param>
/// <returns></returns>
public static string[] ReplaceFileNames(params string[] fileNames)
{
//At first, we will find count of characters, which are same for
//all filenames as prefix and store it to prefixCount variable and
//we will find count of characters which are same for all filenames
//as postfix and store it to postfixCount variable
var prefixCount = int.MaxValue;
var postfixCount = int.MaxValue;
//We will use first filename as the reference one (we will be comparing)
//all filenames with this one
var referenceFilename = fileNames[0];
var reversedReferenceFilename = referenceFilename.ReverseString();
//Lets find the prefixCount and postfixCount
foreach (var filename in fileNames)
{
if (filename == referenceFilename)
{
continue;
}
//Check for prefix count
var firstDifferenceIndex = referenceFilename.GetFirstDifferentIndexWith(filename);
if (firstDifferenceIndex < prefixCount)
{
prefixCount = firstDifferenceIndex;
}
//For postfix count we will do the same, but with reversed strings
firstDifferenceIndex = reversedReferenceFilename.GetFirstDifferentIndexWith(filename.ReverseString());
if (firstDifferenceIndex < postfixCount)
{
postfixCount = firstDifferenceIndex;
}
}
//So now replace given filnames with our prefix and post fix.
//Our regex determines only how many characters should be replaced
var prefixRegexToReplace = string.Format("^.{{{0}}}", prefixCount);
var postfixRegexToReplace = string.Format(".{{{0}}}$", postfixCount);
var result = new string[fileNames.Length];
for (int i = 0; i < fileNames.Length; i++)
{
//Replace the prefix
result[i] = Regex.Replace(fileNames[i], prefixRegexToReplace, OurPrefixPattern);
//Replace the postfix
result[i] = Regex.Replace(result[i], postfixRegexToReplace, OurPostfixPattern);
}
return result;
}
/// <summary>
/// Gets the first index in which the strings has different character
/// </summary>
/// <param name="value"></param>
/// <param name="stringToCompare"></param>
/// <returns></returns>
private static int GetFirstDifferentIndexWith(this string value, string stringToCompare)
{
return value.Zip(stringToCompare, (c1, c2) => c1 == c2).TakeWhile(b => b).Count();
}
/// <summary>
/// Revers given string
/// </summary>
/// <param name="value">String which should be reversed</param>
/// <returns>Reversed string</returns>
private static string ReverseString(this string value)
{
char[] charArray = value.ToCharArray();
Array.Reverse(charArray);
return new string(charArray);
}
}
}
The console output looks like this
mtn_flint501-muxed >> Prefix_01_Postfix
mtn_flint502-muxed >> Prefix_02_Postfix
mtn_flint503-muxed >> Prefix_03_Postfix
mtn_flint504-muxed >> Prefix_04_Postfix
mtn_flint505-muxed >> Prefix_05_Postfix
mtn_flint506-muxed >> Prefix_06_Postfix
mtn_flint507-muxed >> Prefix_07_Postfix
mtn_flint508-muxed >> Prefix_08_Postfix
mtn_flint509-muxed >> Prefix_09_Postfix
mtn_flint510-muxed >> Prefix_10_Postfix
mtn_flint511-muxed >> Prefix_11_Postfix
mtn_flint512-muxed >> Prefix_12_Postfix

How to sort objects by a string field that contains numbers

i have a class:
public class Essay
{
public int ID{get;set;}
public string Name{get;set;}
}
and list of Essay type
List<Essay> essays=new List<Essay>();
on the name property contains numbers and letters.
i want to sort the list by the name property
for example:
essays=
{1,"ccccc"},
{2,"aaaa"},
{3,"bbbb"},
{4,"10"},
{5,"1"},
{6,"2"},
{7,"1a"}
i want to sort:
essays=
{2,"aaaa"},
{3,"bbbb"},
{1,"ccccc"},
{5,"1"},
{7,"1a"},
{6,"2"},
{4,"10"}
how i do it?
thank to all.
The name mixes characters and numbers, which leads to a awkward sorting if you simply sort alphabetically.
What you more seems to prefer is called naturally sorting, where the numbers within the text are really recognized as a number and the sort short apply their on a numeric basis.
A few implementations of this algorithm can be found on Jeffs webpage.
The one i most prefer is the one from Dave:
/*
* The Alphanum Algorithm is an improved sorting algorithm for strings
* containing numbers. Instead of sorting numbers in ASCII order like
* a standard sort, this algorithm sorts numbers in numeric order.
*
* The Alphanum Algorithm is discussed at http://www.DaveKoelle.com
*
* Based on the Java implementation of Dave Koelle's Alphanum algorithm.
* Contributed by Jonathan Ruckwood <jonathan.ruckwood#gmail.com>
*
* Adapted by Dominik Hurnaus <dominik.hurnaus#gmail.com> to
* - correctly sort words where one word starts with another word
* - have slightly better performance
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
using System;
using System.Collections;
using System.Text;
/*
* Please compare against the latest Java version at http://www.DaveKoelle.com
* to see the most recent modifications
*/
namespace AlphanumComparator
{
public class AlphanumComparator : IComparer
{
private enum ChunkType {Alphanumeric, Numeric};
private bool InChunk(char ch, char otherCh)
{
ChunkType type = ChunkType.Alphanumeric;
if (char.IsDigit(otherCh))
{
type = ChunkType.Numeric;
}
if ((type == ChunkType.Alphanumeric && char.IsDigit(ch))
|| (type == ChunkType.Numeric && !char.IsDigit(ch)))
{
return false;
}
return true;
}
public int Compare(object x, object y)
{
String s1 = x as string;
String s2 = y as string;
if (s1 == null || s2 == null)
{
return 0;
}
int thisMarker = 0, thisNumericChunk = 0;
int thatMarker = 0, thatNumericChunk = 0;
while ((thisMarker < s1.Length) || (thatMarker < s2.Length))
{
if (thisMarker >= s1.Length)
{
return -1;
}
else if (thatMarker >= s2.Length)
{
return 1;
}
char thisCh = s1[thisMarker];
char thatCh = s2[thatMarker];
StringBuilder thisChunk = new StringBuilder();
StringBuilder thatChunk = new StringBuilder();
while ((thisMarker < s1.Length) && (thisChunk.Length==0 ||InChunk(thisCh, thisChunk[0])))
{
thisChunk.Append(thisCh);
thisMarker++;
if (thisMarker < s1.Length)
{
thisCh = s1[thisMarker];
}
}
while ((thatMarker < s2.Length) && (thatChunk.Length==0 ||InChunk(thatCh, thatChunk[0])))
{
thatChunk.Append(thatCh);
thatMarker++;
if (thatMarker < s2.Length)
{
thatCh = s2[thatMarker];
}
}
int result = 0;
// If both chunks contain numeric characters, sort them numerically
if (char.IsDigit(thisChunk[0]) && char.IsDigit(thatChunk[0]))
{
thisNumericChunk = Convert.ToInt32(thisChunk.ToString());
thatNumericChunk = Convert.ToInt32(thatChunk.ToString());
if (thisNumericChunk < thatNumericChunk)
{
result = -1;
}
if (thisNumericChunk > thatNumericChunk)
{
result = 1;
}
}
else
{
result = thisChunk.ToString().CompareTo(thatChunk.ToString());
}
if (result != 0)
{
return result;
}
}
return 0;
}
}
}
There are several elements to the answer.
The first part is being able to in-place sort a List using Sort() and a lambda comparison method. That's solved by using an extension method for IList and a helper "ComparisonDelegator" class. Combining those, it's possible to pass a lambda to List.Sort().
The second part has been addressed in another post here (which I have upvoted) and the code from which I have shamelessly pasted into the AlphanumComparator class in this answer.
(As a side note, I should point out that all the Linq examples posted elsewhere in this thread make a COPY of the list. This is fine for short lists, but if you have a long list it can cause performance problems. The solution presented here does NOT make a copy of the list.)
Putting it all together, we get the following code, which outputs:
ID=2, Name=aaaa
ID=3, Name=bbbb
ID=1, Name=ccccc
ID=5, Name=1
ID=7, Name=1a
ID=6, Name=2
ID=4, Name=10
And the full code sample (compilable as a console application):
using System;
using System.Collections;
using System.Collections.Generic;
using System.Globalization;
using System.Text;
namespace Demo
{
public static class Program
{
public static void Main()
{
var list = new List<Essay>
{
new Essay {ID=1, Name="ccccc"},
new Essay {ID=2, Name="aaaa"},
new Essay {ID=3, Name="bbbb"},
new Essay {ID=4, Name="10"},
new Essay {ID=5, Name="1"},
new Essay {ID=6, Name="2"},
new Essay {ID=7, Name="1a"}
};
var comp = new AlphanumComparator();
list.Sort((lhs, rhs) => comp.Compare(lhs.Name, rhs.Name));
foreach (var essay in list)
{
Console.WriteLine("ID={0}, Name={1}", essay.ID, essay.Name);
}
}
}
public class Essay
{
public int ID
{
get;
set;
}
public string Name
{
get;
set;
}
}
/// <summary>Extensions for IList{T}</summary>
public static class ListExt
{
/// <summary> Sorts an IList{T} in place. </summary>
public static void Sort<T>(this IList<T> list, Comparison<T> comparison)
{
ArrayList.Adapter((IList)list).Sort(new ComparisonDelegator<T>(comparison));
}
}
/// <summary>
/// Provides a mechanism for easily converting a Comparison<> delegate (or lambda) to an IComparer<>.
/// This can be used for List.BinarySearch(), for example.
/// </summary>
/// <typeparam name="T">The type of items to be compared.</typeparam>
public sealed class ComparisonDelegator<T>: IComparer<T>, IComparer
{
/// <summary>Create from a Comparison<> delegate.</summary>
/// <param name="comparison">A Comparison<> delegate.</param>
public ComparisonDelegator(Comparison<T> comparison)
{
this._comparison = comparison;
}
/// <summary>Implements the IComparer.Compare() method.</summary>
public int Compare(T x, T y)
{
return _comparison(x, y);
}
/// <summary>Implements the IComparer.Compare() method.</summary>
public int Compare(object x, object y)
{
return _comparison((T)x, (T)y);
}
/// <summary>Used to store the Comparison delegate.</summary>
private readonly Comparison<T> _comparison;
}
/// <summary>
/// Special class to sort strings "naturally",
/// but to place non-numeric items *before* numeric items.
/// </summary>
public class AlphanumComparator : IComparer
{
private enum ChunkType {Alphanumeric, Numeric};
private bool InChunk(char ch, char otherCh)
{
ChunkType type = ChunkType.Alphanumeric;
if (char.IsDigit(otherCh))
{
type = ChunkType.Numeric;
}
if ((type == ChunkType.Alphanumeric && char.IsDigit(ch))
|| (type == ChunkType.Numeric && !char.IsDigit(ch)))
{
return false;
}
return true;
}
public int Compare(object x, object y)
{
String s1 = x as string;
String s2 = y as string;
if (s1 == null || s2 == null)
{
return 0;
}
int thisMarker = 0, thisNumericChunk = 0;
int thatMarker = 0, thatNumericChunk = 0;
while ((thisMarker < s1.Length) || (thatMarker < s2.Length))
{
if (thisMarker >= s1.Length)
{
return -1;
}
else if (thatMarker >= s2.Length)
{
return 1;
}
char thisCh = s1[thisMarker];
char thatCh = s2[thatMarker];
StringBuilder thisChunk = new StringBuilder();
StringBuilder thatChunk = new StringBuilder();
while ((thisMarker < s1.Length) && (thisChunk.Length==0 ||InChunk(thisCh, thisChunk[0])))
{
thisChunk.Append(thisCh);
thisMarker++;
if (thisMarker < s1.Length)
{
thisCh = s1[thisMarker];
}
}
while ((thatMarker < s2.Length) && (thatChunk.Length==0 ||InChunk(thatCh, thatChunk[0])))
{
thatChunk.Append(thatCh);
thatMarker++;
if (thatMarker < s2.Length)
{
thatCh = s2[thatMarker];
}
}
int result = 0;
// If both chunks contain numeric characters, sort them numerically
if (char.IsDigit(thisChunk[0]) && char.IsDigit(thatChunk[0]))
{
thisNumericChunk = Convert.ToInt32(thisChunk.ToString());
thatNumericChunk = Convert.ToInt32(thatChunk.ToString());
if (thisNumericChunk < thatNumericChunk)
{
result = -1;
}
if (thisNumericChunk > thatNumericChunk)
{
result = 1;
}
}
else if (char.IsDigit(thisChunk[0]) && !char.IsDigit(thatChunk[0]))
{
return 1; // Ensure that non-numeric sorts before numeric.
}
else if (!char.IsDigit(thisChunk[0]) && char.IsDigit(thatChunk[0]))
{
return -1; // Ensure that non-numeric sorts before numeric.
}
else
{
result = thisChunk.ToString().CompareTo(thatChunk.ToString());
}
if (result != 0)
{
return result;
}
}
return 0;
}
}
}
You may just change numeric chars with chars with higher ascii values and do a orderby and revert back.
List<Essay> essays = new List<Essay>();
essays.Add(new Essay(){ID = 1, Name = "ccccc"});
essays.Add(new Essay(){ID = 2, Name = "aaaa"});
essays.Add(new Essay(){ID = 3, Name = "bbbb"});
essays.Add(new Essay(){ID = 4, Name = "10"});
essays.Add(new Essay(){ID = 5, Name = "1"});
essays.Add(new Essay(){ID = 6, Name = "2"});
essays.Add(new Essay(){ID = 7, Name = "1a"});
essays.ForEach(q => Replace(q));
var result = essays.OrderBy(q => q.Name).ToList();
result.ForEach(q => Revert(q));
Related Functions:
public void Replace(Essay x)
{
x.Name = x.Name.Replace('0', ((char)240));
x.Name = x.Name.Replace('1', ((char)241));
x.Name = x.Name.Replace('2', ((char)242));
x.Name = x.Name.Replace('3', ((char)243));
x.Name = x.Name.Replace('4', ((char)244));
x.Name = x.Name.Replace('5', ((char)245));
x.Name = x.Name.Replace('6', ((char)246));
x.Name = x.Name.Replace('7', ((char)247));
x.Name = x.Name.Replace('8', ((char)248));
x.Name = x.Name.Replace('9', ((char)249));
}
public void Revert(Essay x)
{
x.Name = x.Name.Replace(((char)240), '0');
x.Name = x.Name.Replace(((char)241), '1');
x.Name = x.Name.Replace(((char)242), '2');
x.Name = x.Name.Replace(((char)243), '3');
x.Name = x.Name.Replace(((char)244), '4');
x.Name = x.Name.Replace(((char)245), '5');
x.Name = x.Name.Replace(((char)246), '6');
x.Name = x.Name.Replace(((char)247), '7');
x.Name = x.Name.Replace(((char)248), '8');
x.Name = x.Name.Replace(((char)249), '9');
}
I have created an IComparer implementation and use OrderBy to sort the list. I use a regex to capture the digits at the start of the Name and if they exist use those to order the items, if not then just compare the names using a string comparison. A name starting with a number always comes after all names starting with letters. It gives the output you have requested.
public class EssayNameComparer : IComparer<string>
{
private Regex _digits = new Regex("(\\d+)(.*)");
public int Compare(string a, string b)
{
Match matcha = _digits.Match(a);
Match matchb = _digits.Match(b);
if (matcha.Success && matchb.Success)
{
int numa = int.Parse(matcha.Groups[1].Value);
int numb = int.Parse(matchb.Groups[1].Value);
return numa.CompareTo(numb);
}
else if (matcha.Success)
{
return 1;
}
else if (matchb.Success)
{
return -1;
}
else
{
return string.Compare(a, b);
}
}
}
public class Program
{
public static void Main(string[] args)
{
List<Essay> essays= new List<Essay>() {
new Essay { ID = 1, Name = "ccccc"},
new Essay { ID = 2, Name = "aaaa"},
new Essay { ID = 3, Name = "bbbb"},
new Essay { ID = 4, Name = "10"},
new Essay { ID = 5, Name = "1"},
new Essay { ID = 6, Name = "2"},
new Essay { ID = 7, Name = "1a"}
};
foreach(Essay essay in essays.OrderBy(e => e.Name, new EssayNameComparer()))
{
Console.WriteLine(essay.Name);
}
}
}
Output:
aaaa
bbbb
ccccc
1
1a
2
10

Is it required to check before replacing a string in StringBuilder (using functions like "Contains" or "IndexOf")?

Is there any method IndexOf or Contains in C#. Below is the code:
var sb = new StringBuilder(mystring);
sb.Replace("abc", "a");
string dateFormatString = sb.ToString();
if (sb.ToString().Contains("def"))
{
sb.Replace("def", "aa");
}
if (sb.ToString().Contains("ghi"))
{
sb.Replace("ghi", "assd");
}
As you might have noticed I am using ToString() above again and again which I want to avoid as it is creating new string everytime. Can you help me how can I avoid it?
If the StringBuilder doesn't contain "def" then performing the replacement won't cause any problems, so just use:
var sb = new StringBuilder(mystring);
sb.Replace("abc", "a");
sb.Replace("def", "aa");
sb.Replace("ghi", "assd");
There's no such method in StringBuilder but you don't need the Contains tests. You can simply write it like this:
sb.Replace("abc", "a");
sb.Replace("def", "aa");
sb.Replace("ghi", "assd");
If the string in the first parameter to Replace is not found then the call to Replace is a null operation—exactly what you want.
The documentation states:
Replaces all occurrences of a specified string in this instance with another specified string.
The way you read this is that when there are no occurrences, nothing is done.
You can write a class that extends methods to the StringBuilder object. Here, I have added IndexOf, Substring, and other methods to the StringBuilder class. Just put this class in your project.
using System;
using System.Text;
namespace Helpers
{
/// <summary>
/// Adds IndexOf, IsStringAt, AreEqual, and Substring to all StringBuilder objects.
/// </summary>
public static class StringBuilderExtension
{
// Adds IndexOf, Substring, AreEqual to the StringBuilder class.
public static int IndexOf(this StringBuilder theStringBuilder,string value)
{
const int NOT_FOUND = -1;
if (theStringBuilder == null)
{
return NOT_FOUND;
}
if (String.IsNullOrEmpty(value))
{
return NOT_FOUND;
}
int count = theStringBuilder.Length;
int len = value.Length;
if (count < len)
{
return NOT_FOUND;
}
int loopEnd = count - len + 1;
for (int loop = 0; loop < loopEnd; loop++)
{
bool found = true;
for (int innerLoop = 0; innerLoop < len; innerLoop++)
{
if (theStringBuilder[loop + innerLoop] != value[innerLoop])
{
found = false;
break;
}
}
if (found)
{
return loop;
}
}
return NOT_FOUND;
}
public static int IndexOf(this StringBuilder theStringBuilder, string value,int startPosition)
{
const int NOT_FOUND = -1;
if (theStringBuilder == null)
{
return NOT_FOUND;
}
if (String.IsNullOrEmpty(value))
{
return NOT_FOUND;
}
int count = theStringBuilder.Length;
int len = value.Length;
if (count < len)
{
return NOT_FOUND;
}
int loopEnd = count - len + 1;
if (startPosition >= loopEnd)
{
return NOT_FOUND;
}
for (int loop = startPosition; loop < loopEnd; loop++)
{
bool found = true;
for (int innerLoop = 0; innerLoop < len; innerLoop++)
{
if (theStringBuilder[loop + innerLoop] != value[innerLoop])
{
found = false;
break;
}
}
if (found)
{
return loop;
}
}
return NOT_FOUND;
}
public static string Substring(this StringBuilder theStringBuilder, int startIndex, int length)
{
return theStringBuilder == null ? null : theStringBuilder.ToString(startIndex, length);
}
public static bool AreEqual(this StringBuilder theStringBuilder, string compareString)
{
if (theStringBuilder == null)
{
return compareString == null;
}
if (compareString == null)
{
return false;
}
int len = theStringBuilder.Length;
if (len != compareString.Length)
{
return false;
}
for (int loop = 0; loop < len; loop++)
{
if (theStringBuilder[loop] != compareString[loop])
{
return false;
}
}
return true;
}
/// <summary>
/// Compares one string to part of another string.
/// </summary>
/// <param name="haystack"></param>
/// <param name="needle">Needle to look for</param>
/// <param name="position">Looks to see if the needle is at position in haystack</param>
/// <returns>Substring(theStringBuilder,offset,compareString.Length) == compareString</returns>
public static bool IsStringAt(this StringBuilder haystack, string needle,int position)
{
if (haystack == null)
{
return needle == null;
}
if (needle == null)
{
return false;
}
int len = haystack.Length;
int compareLen = needle.Length;
if (len < compareLen + position)
{
return false;
}
for (int loop = 0; loop < compareLen; loop++)
{
if (haystack[loop+position] != needle[loop])
{
return false;
}
}
return true;
}
}
}
IMHO you don't have to use StringBuilder in this case... StringBuilder is more useful when used in a loop. Like Microsoft say in In this article
The String object is immutable. Every
time you use one of the methods in the
System.String class, you create a new
string object in memory, which
requires a new allocation of space for
that new object. In situations where
you need to perform repeated
modifications to a string, the
overhead associated with creating a
new String object can be costly. The
System.Text.StringBuilder class can be
used when you want to modify a string
without creating a new object. For
example, using the StringBuilder class
can boost performance when
concatenating many strings together in
a loop
So simply you can use String and avoid use ToString()...

Any Framework functions helping to find the longest common starting substring of multiple strings?

I have a list of strings (which represent paths and) which should all have a common beginning (root path). I need to get that common beginning.
That's just a couple of lines to throw together, but I have the nagging feeling that this must be thrown together a million times a year and that there might be an algorithm in the framework that can be used for that, but couldn't find something.
Also, I suppose this has been asked on SO before, but I came up dry.
Any hints?
If anyone is interested, here's what I came up with:
public static string GetCommonStartingSubString(IList<string> strings)
{
if (strings.Count == 0)
return "";
if (strings.Count == 1)
return strings[0];
int charIdx = 0;
while (IsCommonChar(strings, charIdx))
++charIdx;
return strings[0].Substring(0, charIdx);
}
private static bool IsCommonChar(IList<string> strings, int charIdx)
{
if(strings[0].Length <= charIdx)
return false;
for (int strIdx = 1; strIdx < strings.Count; ++strIdx)
if (strings[strIdx].Length <= charIdx
|| strings[strIdx][charIdx] != strings[0][charIdx])
return false;
return true;
}
This method should work:
string GetLongestCommonPrefix(IEnumerable<string> items)
{
return items.Aggregate(default(string), GetLongestCommonPrefix);
}
string GetLongestCommonPrefix(string s1, string s2)
{
if (s1 == null || s2 == null)
return s1 ?? s2;
int n = Math.Min(s1.Length, s2.Length);
int i;
for (i = 0; i < n; i++)
{
if (s1[i] != s2[i])
break;
}
return s1.Substring(0, i);
}
Excuse my ordinary variable naming, and it's not very fast, but this should do:
// your list of strings...
List<string> strings;
string shortestString = strings.First(x => x.Length ==
strings.Select(y => y.Length).Min());
while (!strings.All(s => s.StartsWith(shortestString)))
{
shortestString = shortestString.Substring(0, shortestString.Length - 1);
}
One idea to simplify your implementation is to write just a method to get the longest substring of two strings and then use Aggregate method from LINQ. Something like:
strings.Skip(1).Aggregate(strings.First(), GetCommonSubString);
I don't think there is any elegant way to implement GetCommonSubstring using standard methods for working with strings. If you care about performance, then you'll probably have to implement it in the "direct" way. A slower, but shorter alternative using LINQ could look something like this:
var chars =
str1.Zip(str2, (c1, c2) => new { Match = c1 == c2, Char = c1 })
.TakeWhile(c => c.Match).Select(c => c.Char).ToArray();
return new string(chars);
This first "zips" the two strings and then takes parts where the characters are the same using TakeWhile. The rest generates an array of characters that can be used to create a string with the result.
Maybe I simplify your problem too much but what about
var rootPath = paths.Select(s => new {path = s, depth = s.Split('\\').Length}).
Aggregate((memo, curr) => curr.depth < memo.depth ? curr : memo).path;
Desperate, most probably slow, and all around pretty silly try
var paths = new List<string> { #"C:\Ruby19\lib\ruby\gems",
#"C:\Ruby19\lib\ruby\gems\1.9.2",
#"C:\Ruby19\lib\ruby\gems",
#"C:\Ruby19\lib\test\fest\hest"};
var rootPath = paths.Select(s => new { p = s.Split('\\') })
.Aggregate((memo, curr) => new { p = curr.p.TakeWhile((stp, ind) => stp == memo.p.ElementAtOrDefault(ind)).ToArray() })
.p.Join("\\");
=> rootPath = "C:\Ruby19\lib"
I had the same problem (like many others) some time ago. Here is the solution i came up with this. I didn't make any performance measurements but i hadn't any problems with lists of 100 elements.
using System;
using System.Collections.Generic;
using System.Linq;
namespace FEV.TOPexpert.Common.Extensions
{
public static class IEnumerableOfStringExtension
{
/// <summary>
/// Finds the most common left string in a sequence of strings.
/// </summary>
/// <param name="source">The sequence to search in.</param>
/// <returns>The most common left string in the sequence.</returns>
public static string MostCommonLeftString(this IEnumerable<string> source)
{
return source.MostCommonLeftString(StringComparison.InvariantCulture);
}
/// <summary>
/// Finds the most common left string in a sequence of strings.
/// </summary>
/// <param name="source">The sequence to search in.</param>
/// <param name="comparisonType">Type of the comparison.</param>
/// <returns>The most common left string in the sequence.</returns>
public static string MostCommonLeftString(this IEnumerable<string> source, StringComparison comparisonType)
{
if (source == null)
throw new ArgumentNullException("source");
string mcs = String.Empty;
using (var e = source.GetEnumerator())
{
if (!e.MoveNext())
return mcs;
mcs = e.Current;
while (e.MoveNext())
mcs = mcs.MostCommonLeftString(e.Current, comparisonType);
}
return mcs;
}
/// <summary>
/// Returns a sequence with the most common left strings from a sequence of strings.
/// </summary>
/// <param name="source">A sequence of string to search through.</param>
/// <returns>A sequence of the most common left strings ordered in descending order.</returns>
public static IEnumerable<string> MostCommonLeftStrings(this IEnumerable<string> source)
{
return MostCommonLeftStrings(source, StringComparison.InvariantCulture);
}
/// <summary>
/// Returns a sequence with the most common left strings from a sequence of strings.
/// </summary>
/// <param name="source">A sequence of string to search through.</param>
/// <param name="comparisonType">Type of comparison.</param>
/// <returns>A sequence of the most common left strings ordered in descending order.</returns>
public static IEnumerable<string> MostCommonLeftStrings(this IEnumerable<string> source, StringComparison comparisonType)
{
if (source == null)
throw new ArgumentNullException("source");
var listOfMcs = new List<string>();
using (var e = source.GetEnumerator())
{
while (e.MoveNext())
{
if (e.Current == null)
continue;
string removeFromList = String.Empty;
string addToList = String.Empty;
foreach (var element in listOfMcs)
{
addToList = e.Current.MostCommonLeftString(element, comparisonType);
if (addToList.Length > 0)
{
removeFromList = element;
break;
}
}
if (removeFromList.Length <= 0)
{
listOfMcs.Add(e.Current);
continue;
}
if (addToList != removeFromList)
{
listOfMcs.Remove(removeFromList);
listOfMcs.Add(addToList);
}
}
}
return listOfMcs.OrderByDescending(item => item.Length);
}
/// <summary>
/// Returns a string that both strings have in common started from the left.
/// </summary>
/// <param name="first">The first string.</param>
/// <param name="second">The second string.</param>
/// <returns>Returns a string that both strings have in common started from the left.</returns>
public static string MostCommonLeftString(this string first, string second)
{
return first.MostCommonLeftString(second, StringComparison.InvariantCulture);
}
/// <summary>
/// Returns a string that both strings have in common started from the left.
/// </summary>
/// <param name="first">The first string.</param>
/// <param name="second">The second string.</param>
/// <param name="comparisonType">Type of comparison.</param>
/// <returns>Returns a string that both strings have in common started from the left.</returns>
public static string MostCommonLeftString(this string first, string second, StringComparison comparisonType)
{
if (first == null
|| second == null)
return null;
int length = Math.Min(first.Length, second.Length);
first = first.Substring(0, length);
second = second.Substring(0, length);
while (!first.Equals(second, comparisonType))
{
first = first.Substring(0, first.Length - 1);
second = second.Substring(0, second.Length - 1);
}
return first;
}
private static bool MatchesWithList(string match, IList<string> elements, StringComparison comparisonType)
{
string removeFromList = String.Empty;
string addToList = String.Empty;
foreach (var element in elements)
{
addToList = match.MostCommonLeftString(element, comparisonType);
if (addToList.Length > 0)
{
removeFromList = element;
}
}
if (removeFromList.Length > 0)
{
if (addToList != removeFromList)
{
elements.Remove(removeFromList);
elements.Add(addToList);
}
return true;
}
return false;
}
}
}
The following returns the longest common prefix of any set of IEnumerable<T> not just strings.
public static bool Same<T>(this IEnumerable<T> xs) {
return !xs.Any() || !xs.Skip(!xs.Skip(1).All(x => x.Equals(xs.First()));
}
public static IEnumerable<T> CommonPrefix<T>(this IEnumerable<IEnumerable<T>> xss) {
var r = new List<T>();
var es = xss.Select(x => x.GetEnumerator()).ToList();
while (es.Select(x => x.MoveNext()).All(x => x))
if (!es.Select(x => x.Current).Same())
return r;
return r;
}
}

Categories

Resources