How would I change the string
aboutUs
To
about-us
I'd like to be able to do this with regex replace if possible.
I've tried:
public static string ToHypenCase(this string source) {
return Regex.Replace(source, #"[A-Z]", "-$1");
}
You can do it with a combination of regex and ToLower(), like this:
string s = "quickBrownFoxJumpsOverTheLazyDog";
string res = Regex.Replace(s, #"([a-z])([A-Z])", "$1-$2").ToLower();
Console.WriteLine(res);
Demo on ideone.
If you need fast solution with low allocation (it handles also #PeterL cases):
public static string ConvertFromCamelCaseToDashSyntax(string text)
{
var buffer = ArrayPool<char>.Shared.Rent(text.Length + 10); // define max size of the internal buffer, 10 = max 10 segments
try
{
var resultLength = 0;
for (var i = 0; i < text.Length; i++)
{
if (char.IsUpper(text[i]) && i > 0)
{
buffer[resultLength++] = '-';
}
buffer[resultLength++] = char.ToLowerInvariant(text[i]);
}
return new string(buffer.AsSpan().Slice(0, resultLength));
}
finally
{
ArrayPool<char>.Shared.Return(buffer);
}
}
Benchmark:
Method
Text
Mean
Error
StdDev
Ratio
Gen 0
Allocated
UsingRegex
quick(...)zyDog [32]
1,894.7 ns
2.38 ns
2.11 ns
1.00
0.0114
208 B
UsingArrayPoolAndSpans
quick(...)zyDog [32]
106.3 ns
0.23 ns
0.20 ns
0.06
0.0062
104 B
You can change your extension method like this:
public static string ToHypenCase(this string source)
{
var chars = source.ToList();
for (int i = 0; i < chars.Count -1; i++)
{
if (!char.IsWhiteSpace(chars[i]) && char.IsUpper(chars[i + 1]))
{
chars[i + 1] = char.ToLower(chars[i + 1]);
chars.Insert(i+1,'-');
}
}
return new string(chars.ToArray());
}
Or just use LINQ:
var parts = "aboutUs".Select(x =>
{
if (char.IsUpper(x)) return "-" + char.ToLower(x);
return x.ToString();
});
var result = string.Join("", parts);
My version using a StringBuilder...
public static string UrlConvert(string s)
{
if (string.IsNullOrEmpty(s)) return s;
var sb = new StringBuilder();
foreach (var ch in s.ToCharArray())
if (char.IsUpper(ch))
{
if (sb.Length > 0) sb.Append("-");
sb.Append(char.ToLower(ch));
}
else
{
sb.Append(ch);
}
return sb.ToString();
}
And xUnit tests...
[Theory]
[InlineData("same", "same")]
[InlineData("Simple", "simple")]
[InlineData("MultiWord", "multi-word")]
[InlineData("UsingAnAWord", "using-an-a-word")]
[InlineData("SomeDigit4Used2", "some-digit4-used2")]
public void UrlConvertWorks(string input, string expected)
{
Assert.Equal(expected, Meta.UrlConvert(input));
}
Related
I need some help with JS code translation to C#.
JS Code:
function Decription(string) {
var newString = '',
char, codeStr, firstCharCode, lastCharCode;
var ft = escape(string);
string = decodeURIComponent(ft);
for (var i = 0; i < string.length; i++) {
char = string.charCodeAt(i);
if (char > 132) {
codeStr = char.toString(10);
firstCharCode = parseInt(codeStr.substring(0, codeStr.length - 2), 10);
lastCharCode = parseInt(codeStr.substring(codeStr.length - 2, codeStr.length), 10) + 31;
newString += String.fromCharCode(firstCharCode) + String.fromCharCode(lastCharCode);
} else {
newString += string.charAt(i);
}
}
return newString;
}
And I tied to translate on C#:
private string Decription(string encriptedText)
{
string ft = Regex.Escape(encriptedText);
string text = HttpUtility.UrlDecode(ft);
string newString = "";
for (int i = 0; i < text.Length; i++)
{
var ch = (int)text[i];
if(ch > 132)
{
var codeStr = Convert.ToString(ch, 10);
var firstCharCode = Convert.ToInt32(codeStr.Substring(0, codeStr.Length - 2), 10);
var lastCharCode = Convert.ToInt32(codeStr.Substring(codeStr.Length - 2, codeStr.Length), 10) + 31;
}
}
}
But how I can translate this row:
newString += String.fromCharCode(firstCharCode) + String.fromCharCode(lastCharCode);
Maybe do you know equivalent method to String.fromCharCode() on C# ?
Here you go:
public static void Main()
{
// from 97 to a
int i = 97;
Console.WriteLine(StringFromCharCode(97));
}
public static string StringFromCharCode(int code) => ((char)code).ToString();
Demo
Note that you may want to use a StringBuilder instead of concatening string in your case.
Related to Int to Char in C# (Read it. It includes a nice comment about cast vs Convert.ToChar)
You can simply use Convert.ToChar method, Convert.ToChar(97) will return a.
I'm trying to create permutations of the Word "password", I have managed to create all permutations of the word with the code below. Currently this does Upper cases and lower cases. I need to Include permutations that have special characters for example where I will have "p#ssword". Where I'm replacing "a" with "#" and "o" "0".
Any idea how I can expand my implementation?
public static void WritePermutations(string pwd)
{
pwd = pwd.ToLower();
var myDict = new Dictionary<int, string>();
int Count = 0;
var results =
from e in Enumerable.Range(0, 1 << pwd.Length)
let p =
from b in Enumerable.Range(0, pwd.Length)
select (e & (1 << b)) == 0 ? (char?)null : pwd[b]
select string.Join(string.Empty, p);
foreach (string s in results)
{
string newValue = pwd;
s.ToLower();
foreach (char c in s)
{
var Old = c.ToString().ToLower();
var New = c.ToString().ToUpper();
newValue = ReplaceFirstOccurrence(newValue, Old, New);
Count++;
}
myDict.Add(Count, newValue);
}
foreach (var cred in myDict)
{
Console.WriteLine(cred.Value);
}
}
public static string ReplaceFirstOccurrence(string Source, string Find, string Replace)
{
int Place = Source.IndexOf(Find);
string result = Source.Remove(Place, Find.Length).Insert(Place, Replace);
return result;
}
How about starting off with these declarations and functions:
List<HashSet<char>> _charCombinations = new List<HashSet<char>> {
new HashSet<char> {'a','#'},
new HashSet<char> {'o', '0'},
};
HashSet<char> GetAlternatives(char c)
{
var result = new HashSet<char>();
foreach (var hashSet in _charCombinations)
{
if (hashSet.Contains(c))
{
foreach (char c2 in hashSet)
result.Add(c2);
}
}
if (char.IsLetter(c))
{
result.Add((String.Empty + c).ToUpper()[0]);
result.Add((String.Empty + c).ToLower()[0]);
}
else if (false) // any other char.Is-based logic
{
}
result.Add(c);
return result;
}
IEnumerable<string> GetTransformations(string s, int start)
{
char c = s[start - 1];
foreach (var c2 in GetAlternatives(c))
{
if (start == s.Length)
yield return String.Empty + c2;
else
{
var e = GetTransformations(s, start + 1).GetEnumerator();
while (e.MoveNext())
yield return c2 + e.Current;
}
}
}
You can then use them like this:
var e = GetTransformations("password", 1).GetEnumerator();
var result = new List<string>();
while (e.MoveNext())
result.Add(e.Current);
result.Sort((a,b) => string.CompareOrdinal(a, b));
This produces 576 strings (too long to list here), which is exactly what you'd expect for an 8-letter word where 6 of the chars have 2 possibilities, and the other 2 have 3 possibilities, i.e. 2x2x2x2x2x2x3x3
I'm searching for a function in c# that convert to lowercase a string, but keep uppercase the word that originally was uppercase.
"Pippo, pluto. paperino" -> "pippo, pluto. paperino"
"Pippo, PLUTO. paperino" -> "pippo, PLUTO. paperino"
but if all word is Uppercase then convert to lowercase.
"PIPPO, PLUTO. PAPERINO" -> "pippo, pluto. paperino"
I found the solution below. I've consider also all the separator. What do you think?
private static string buildSourceString(string sourceString)
{
sourceString += " ";
var sb = new StringBuilder(sourceString.Length);
int nOfCharWord = 0;
int nOfCharLower = 0;
int nWord = 0;
int nWordUpper = 0;
foreach (var c in sourceString)
{
if (c == '.' | c == ',' | c == ';' | c == ':' | c == '!' | c == '?' | c == '\n' | c == '\r' || Char.IsWhiteSpace(c))
{
if (nOfCharWord > 0)
nWord++;
if (nOfCharWord > 0 && nOfCharWord.Equals(nOfCharLower))
{
for (int i = sb.Length - nOfCharWord; i < sb.Length; i++)
{
sb[i] = char.ToUpper(sb[i]);
}
nWordUpper++;
}
// reset contatori
nOfCharWord = 0;
nOfCharLower = 0;
sb.Append(c);
}
else
{
if (c.ToString().Equals(c.ToString().ToUpper()))
{
sb.Append(char.ToLower(c));
nOfCharLower++;
}
else
{
sb.Append(c);
}
nOfCharWord++;
}
}
if (nWord == nWordUpper)
{
sb = new StringBuilder(sourceString.Length);
sb = sb.Append(sourceString.ToLower());
}
sb.Remove(sb.Length - 1, 1);
return sb.ToString();
}
public static bool IsAllUpper(string input)
{
for (int i = 0; i < input.Length; i++)
{
if (Char.IsLetter(input[i]) && !Char.IsUpper(input[i]))
return false;
}
return true;
}
public static string GetString(String input)
{
var arr = input.Split(' ');
var result = "";
foreach(var item in arr)
{
if(IsAllUpper(item))
result += ' ' + item;
else
result += ' ' + item.ToLower();
}
return result;
}
public static void Main()
{
String s1 = "Pippo, pluto. paperino";
String s2 = "Pippo, PLUTO. paperino";
String s3 = "PIPPO, PLUTO. PAPERINO";
Console.WriteLine(GetString(s1));
Console.WriteLine(GetString(s2));
Console.WriteLine(GetString(s3));
}
This will convert any lower-case words at the beginning of sentences to make the beginning of the word in caps. For example:
hi. i'm Matteo and work in IP.
will be converted to
Hi. I'm Matteo and work in IP.
Similarly,
hi. i'm Matteo and work in IP. please direct your QUESTIONS to me. feel free to email, Or phone.
will convert to:
Hi. I'm Matteo and work in IP. Please direct your QUESTIONS to me. Feel free to email, Or phone.
The best way to approach this sort of specific string manipulation in your question would be to use the Regex class. I'd suggest looking at MSDN's minimal documentation of how to use regular expressions in .NET. If you're not familiar with how to use regular expressions in general, there is much help that can be found via your favorite search engine.
I'll illustrate using your last example. To dovetail into an example provided by MSDN here (illustrating a Replace() overload):
class Program
{
static void Main(string[] args)
{
string original = "hi. i'm Matteo and work in IP";
Console.WriteLine(Regex.Replace(original, #"\A[a-z]|(?<=\W{2})[a-z]", new MatchEvaluator(CapText), RegexOptions.ECMAScript));
Console.ReadKey();
}
static string CapText(Match match)
{
string tempStr = match.ToString();
if (char.IsLower(tempStr[0]))
{
return char.ToUpper(tempStr[0]) + tempStr.Substring(1, tempStr.Length - 1);
}
return tempStr;
}
}
The two key pieces are the regular expression in Regex.Replace and the CapText custom evaluator method.
The regular expression in Regex.Replace could be broken down as follows:
\A[a-z] finds a lowercase letter a through z at the beginning of a string.
| is the alternation / OR operand, so either logic before or after it applies.
(?<=\W{2}) is the positive lookbehind, that searches for exactly two non-word characters before the following set...
...which is the last [a-z], which searches for a lowercase letter a through z.
I love TDD :-)
Using your parameters I reach this class:
internal static class ToLowerEx
{
static readonly Regex Words = new Regex(#"(?'item'\b\w+\b)|(?'item'\b\W+\b)", RegexOptions.ExplicitCapture);
public static string Get(string text)
{
if (!Words.IsMatch(text)) return text;
var result = new StringBuilder();
var matches = Words.Matches(text);
foreach (Match match in matches)
result.Append(ProcessWord(match.Value));
return result.ToString();
}
private static string ProcessWord(string text)
{
return !text.All(char.IsUpper)
? text.ToLower()
: text;
}
}
Just in case you get interested, these are the tests:
[TestFixture]
internal class LowerTests
{
[Test]
public void Test_asdf_asdf()
{
var actual = ToLowerEx.Get("asdf");
const string expected = "asdf";
Assert.AreEqual(expected, actual);
}
[Test]
public void Test_Asdf_asdf()
{
var actual = ToLowerEx.Get("Asdf");
const string expected = "asdf";
Assert.AreEqual(expected, actual);
}
[Test]
public void Test_aSdf_asdf()
{
var actual = ToLowerEx.Get("aSdf");
const string expected = "asdf";
Assert.AreEqual(expected, actual);
}
[Test]
public void Test_ASDF_ASDF()
{
var actual = ToLowerEx.Get("ASDF");
const string expected = "ASDF";
Assert.AreEqual(expected, actual);
}
[Test]
public void Test_asdfAndqwer_asdfAandqwer()
{
var actual = ToLowerEx.Get("asdf, qwer");
const string expected = "asdf, qwer";
Assert.AreEqual(expected, actual);
}
[Test]
public void Test_AsdfAndqWer_asdfAandqwer()
{
var actual = ToLowerEx.Get("Asdf, qWer");
const string expected = "asdf, qwer";
Assert.AreEqual(expected, actual);
}
[Test]
public void Test_AsdfAndQWER_asdfAandQWER()
{
var actual = ToLowerEx.Get("Asdf, QWER");
const string expected = "asdf, QWER";
Assert.AreEqual(expected, actual);
}
}
Follow below code
private void button1_Click(object sender, EventArgs e)
{
String s1 = "Pippo, pluto. paperino";
String s2 = "Pippo, PLUTO. paperino";
String s3 = "PIPPO, PLUTO. PAPERINO";
MessageBox.Show(myStringConverter(s1));
MessageBox.Show(myStringConverter(s2));
MessageBox.Show(myStringConverter(s3));
}
public string myStringConverter(string str)
{
string[] strArray = str.Split(' '); // Word is always seprate by Space.
string Answer = "";
for (int i = 0; i < strArray.Length; i++)
{
string tempStr = strArray[i];
var withoutSpecialCharacter = new string(tempStr.Where(c => Char.IsLetterOrDigit(c) || Char.IsWhiteSpace(c)).ToArray());
if (!withoutSpecialCharacter.All(char.IsUpper))
{
Answer += strArray[i].ToLower() + " ";
}
else
{
Answer += strArray[i] + " ";
}
}
return Answer;
}
I want to break a long String in c# without breaking a words
Example: S AAA BBBBBBB CC DDDDDD V Breaking Character on 7 Count:
S AAA
BBBBBBB
CC
DDDDDD
V
How do I do this?
string inputStr = "S AAA BBBBBBB CC DDDDDD V ";
int maxWordLength = 7;
char separator = ' ';
string[] splitted = inputStr.Split(new[]{separator}, StringSplitOptions.RemoveEmptyEntries);
var joined = new Stack<string>();
joined.Push(splitted[0]);
foreach (var str in splitted.Skip(1))
{
var strFromStack = joined.Pop();
var joindedStr = strFromStack + separator + str;
if(joindedStr.Length > maxWordLength)
{
joined.Push(strFromStack);
joined.Push(str);
}
else
{
joined.Push(joindedStr);
}
}
var result = joined.Reverse().ToArray();
Console.WriteLine ("number of words: {0}", result.Length);
Console.WriteLine( string.Join(Environment.NewLine, result) );
prints:
number of words: 5
S AAA
BBBBBBB
CC
DDDDDD
V
Here's a shorter solution harnessing the power of regular expressions.
string input = "S AAA BBBBBBB CC DDDDDD V";
// Match up to 7 characters with optional trailing whitespace, but only on word boundaries
string pattern = #"\b.{1,7}\s*\b";
var matches = Regex.Matches(input, pattern);
foreach (var match in matches)
{
Debug.WriteLine(match.ToString());
}
This does the trick if I've understood your question correctly. A recursive implementation would have been cooler, but tail recursion is too damn bad in C# :)
Could also be implemented with yield and IEnumerable<string>.
string[] splitSpecial(string words, int lenght)
{
// The new result, will be turned into string[]
var newSplit = new List<string>();
// Split on normal chars, ie newline, space etc
var splitted = words.Split();
// Start out with null
string word = null;
for (int i = 0; i < splitted.Length; i++)
{
// If first word, add
if (word == null)
{
word = splitted[i];
}
// If too long, add
else if (splitted[i].Length + 1 + word.Length > lenght)
{
newSplit.Add(word);
word = splitted[i];
}
// Else, concatenate and go again
else
{
word += " " + splitted[i];
}
}
// Flush what we have left, ie the last word
newSplit.Add(word);
// Convert into string[] (a requirement?)
return newSplit.ToArray();
}
Why not to try regex?
(?:^|\s)(?:(.{1,7}|\S{7,}))(?=\s|$)
and use all captures.
C# code:
var text = "S AAA BBBBBBB CC DDDDDD V";
var matches = new Regex(#"(?:^|\s)(?:(.{1,7}|\S{7,}))(?=\s|$)").Matches(text).Cast<Match>().Select(x => x.Groups[1].Value).ToArray();
foreach (var match in matches)
{
Console.WriteLine(match);
}
Output:
S AAA
BBBBBBB
CC
DDDDDD
V
string str = "S AAA BBBBBBB CC DDDDDD V";
var words = str.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
StringBuilder sb = new StringBuilder();
List<string> result = new List<string>();
for (int i = 0; i < words.Length; ++i)
{
if (sb.Length == 0)
{
sb.Append(words[i]);
}
else if (sb.Length + words[i].Length < 7)
{
sb.Append(' ');
sb.Append(words[i]);
}
else
{
result.Add(sb.ToString());
sb.Clear();
sb.Append(words[i]);
}
}
if (sb.Length > 0)
{
result.Add(sb.ToString());
}
Results will contain:
S AAA
BBBBBBB
CC
DDDDDD
V
The predicate can be adjusted depending on if the separator between words should be included in the 7 characters or not.
This is how to add row break to HTML text:
SplitLongText(string _SourceText, int _MaxRowLength)
{
if (_SourceText.Length < _MaxRowLength)
{
return _SourceText;
}
else
{
string _RowBreakText="";
int _CurrentPlace = 0;
while (_CurrentPlace < _SourceText.Length)
{
if (_SourceText.Length - _CurrentPlace < _MaxRowLength)
{
_RowBreakText += _SourceText.Substring(_CurrentPlace);
_CurrentPlace = _SourceText.Length;
}
else
{
string _PartString = _SourceText.Substring(_CurrentPlace, _MaxRowLength);
int _LastSpace = _PartString.LastIndexOf(" ");
if (_LastSpace > 0)
{
_RowBreakText += _PartString.Substring(0, _LastSpace) + "<br/>" + _PartString.Substring(_LastSpace, (_PartString.Length - _LastSpace));
}
else
{
_RowBreakText += _PartString + "<br/>";
}
_CurrentPlace += _MaxRowLength;
}
}
return _RowBreakText;
}
2021
Look at this extension method, it uses recursivity
public static string SubstringDontBreakWords(this string str, int maxLength)
{
return str.Length <= maxLength ? str : str.Substring(0, str.LastIndexOf(" ")).Trim().SubstringDontBreakWords(maxLength);
}
Use it like this
string text = "Hello friends";
text.SubstringDontBreakWords(10)
Are there any ready-made functions for expanding a C# regex-style character set?
For example, expand("a-z1") would return a string containing all the characters a to z, followed by the number 1.
Here's what I've got so far:
public static string ExpandCharacterSet(string set)
{
var sb = new StringBuilder();
int start = 0;
while (start < set.Length - 1)
{
int dash = set.IndexOf('-', start + 1);
if (dash <= 0 || dash >= set.Length - 1)
break;
sb.Append(set.Substring(start, dash - start - 1));
char a = set[dash - 1];
char z = set[dash + 1];
for (var i = a; i <= z; ++i)
sb.Append(i);
start = dash + 2;
}
sb.Append(set.Substring(start));
return sb.ToString();
}
Is there anything I'm overlooking?
PS: Let's ignore negative character sets for now.
Thought my example was quite clear... let's try that again. This is what I want:
ExpandCharacterSet("a-fA-F0-9") == "abcdefABCDEF0123456789"
It took a bit of work to get this but here's what I was able to muster. Of course this is not going to be portable since I'm messing with internals. But it works well enough for simple test cases. It will accept any regex character class but will not work for negated classes. The range of values is way too broad without any restrictions. I don't know if it will be correct for all cases and it doesn't handle repetition at all but it's a start. At least you won't have to roll out your own parser. As of .NET Framework 4.0:
public static class RegexHelper
{
public static string ExpandCharClass(string charClass)
{
var regexParser = new RegexParser(CultureInfo.CurrentCulture);
regexParser.SetPattern(charClass);
var regexCharClass = regexParser.ScanCharClass(false);
int count = regexCharClass.RangeCount();
List<string> ranges = new List<string>();
// range 0 can be skipped
for (int i = 1; i < count; i++)
{
var range = regexCharClass.GetRangeAt(i);
ranges.Add(ExpandRange(range));
}
return String.Concat(ranges);
}
static string ExpandRange(SingleRange range)
{
char first = range._first;
char last = range._last;
return String.Concat(Enumerable.Range(first, last - first + 1).Select(i => (char)i));
}
internal class RegexParser
{
static readonly Type RegexParserType;
static readonly ConstructorInfo RegexParser_Ctor;
static readonly MethodInfo RegexParser_SetPattern;
static readonly MethodInfo RegexParser_ScanCharClass;
static RegexParser()
{
RegexParserType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexParser");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
RegexParser_Ctor = RegexParserType.GetConstructor(flags, null, new[] { typeof(CultureInfo) }, null);
RegexParser_SetPattern = RegexParserType.GetMethod("SetPattern", flags, null, new[] { typeof(String) }, null);
RegexParser_ScanCharClass = RegexParserType.GetMethod("ScanCharClass", flags, null, new[] { typeof(Boolean) }, null);
}
private readonly object instance;
internal RegexParser(CultureInfo culture)
{
instance = RegexParser_Ctor.Invoke(new object[] { culture });
}
internal void SetPattern(string pattern)
{
RegexParser_SetPattern.Invoke(instance, new object[] { pattern });
}
internal RegexCharClass ScanCharClass(bool caseInsensitive)
{
return new RegexCharClass(RegexParser_ScanCharClass.Invoke(instance, new object[] { caseInsensitive }));
}
}
internal class RegexCharClass
{
static readonly Type RegexCharClassType;
static readonly MethodInfo RegexCharClass_RangeCount;
static readonly MethodInfo RegexCharClass_GetRangeAt;
static RegexCharClass()
{
RegexCharClassType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
RegexCharClass_RangeCount = RegexCharClassType.GetMethod("RangeCount", flags, null, new Type[] { }, null);
RegexCharClass_GetRangeAt = RegexCharClassType.GetMethod("GetRangeAt", flags, null, new[] { typeof(Int32) }, null);
}
private readonly object instance;
internal RegexCharClass(object regexCharClass)
{
if (regexCharClass == null)
throw new ArgumentNullException("regexCharClass");
if (regexCharClass.GetType() != RegexCharClassType)
throw new ArgumentException("not an instance of a RegexCharClass object", "regexCharClass");
instance = regexCharClass;
}
internal int RangeCount()
{
return (int)RegexCharClass_RangeCount.Invoke(instance, new object[] { });
}
internal SingleRange GetRangeAt(int i)
{
return new SingleRange(RegexCharClass_GetRangeAt.Invoke(instance, new object[] { i }));
}
}
internal struct SingleRange
{
static readonly Type RegexCharClassSingleRangeType;
static readonly FieldInfo SingleRange_first;
static readonly FieldInfo SingleRange_last;
static SingleRange()
{
RegexCharClassSingleRangeType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass+SingleRange");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
SingleRange_first = RegexCharClassSingleRangeType.GetField("_first", flags);
SingleRange_last = RegexCharClassSingleRangeType.GetField("_last", flags);
}
internal char _first;
internal char _last;
internal SingleRange(object singleRange)
{
if (singleRange == null)
throw new ArgumentNullException("singleRange");
if (singleRange.GetType() != RegexCharClassSingleRangeType)
throw new ArgumentException("not an instance of a SingleRange object", "singleRange");
_first = (char)SingleRange_first.GetValue(singleRange);
_last = (char)SingleRange_last.GetValue(singleRange);
}
}
}
// usage:
RegexHelper.ExpandCharClass(#"[\-a-zA-F1 5-9]");
// "-abcdefghijklmnopqrstuvwxyzABCDEF1 56789"
Seems like a pretty unusual requirement, but since there are only about 96 characters that you can match (unless you include high chars), you might as well just test your regular expression against all of them, and output the matches:
public static string expando(string input_re) {
// add more chars in s as needed, such as ,.?/|=+_-éñ etc.
string s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
string output = "";
Regex exp = new Regex(input_re);
for (int i = 0; i < s.Length; i++) {
if (exp.IsMatch(s.Substring(i, 1))) {
output += s[i];
}
}
return output;
}
By using an actual regex to determine your character class, you can expand whatever regular expression you want, [^A-B]|[0123a-cg-h], for example.
Something like this?
var input = "a-fA-F0-9!";
var matches = Regex.Matches(input,#".-.|.");
var list = new StringBuilder();
foreach (Match m in matches)
{
var value = m.Value;
if (value.Length == 1)
list.Append(value);
else
{
if (value[2] < value[0]) throw new ArgumentException("invalid format"); // or switch, if you want.
for (char c = value[0]; c <= value[2]; c++)
list.Append(c);
}
}
Console.WriteLine(list);
Output:
abcdefABCDEF0123456789!
The moral, of course, is to solve your regex problems with more regex!
Here's a version with support for escape characters. It all depends how robust you want it to be... for example, I don't do anything special here to handle surrogates, so that probably won't work. Also, if you're trying to match the performance of a current regex engine exactly you'll need to know exactly what all the parameters are, which would be a fairly big job.
void Main()
{
//these are all equivalent:
var input = #"\x41-\0x46\u41";
var input2 = #"\65-\70\65";
var input3 = "A-FA";
// match hex as \0x123 or \x123 or \u123, or decimal \412, or the escapes \n\t\r, or any character
var charRegex = #"(\\(0?x|u)[0-9a-fA-F]+|\\[0-9]+|\\[ntr]|.)";
var matches = Regex.Matches(input, charRegex + "-" + charRegex + "|" + charRegex);
var list = new StringBuilder();
foreach (Match m in matches)
{
var dashIndex = m.Value.IndexOf('-', 1); //don't look at 0 (in case it's a dash)
if (dashIndex > 0) // this means we have two items: a range
{
var charLeft = Decode(m.Value.Substring(0,dashIndex));
var charRight = Decode(m.Value.Substring(dashIndex+1));
if (charRight < charLeft) throw new ArgumentException("invalid format (left bigger than right)"); // or switch, if you want.
for (char c = charLeft; c <= charRight; c++)
list.Append(c);
}
else // just one item
{
list.Append(Decode(m.Value));
}
}
Console.WriteLine(list);
}
char Decode(string s)
{
if (s.Length == 1)
return s[0];
// here, s[0] == '\', because of the regex
if (s.Length == 2)
switch (s[1])
{
// incomplete; add more as wished
case 'n': return '\n';
case 't': return '\t';
case 'r': return '\r';
default: break;
}
if (s[1] == 'u' || s[1] == 'x')
return (char)Convert.ToUInt16(s.Substring(2), 16);
else if (s.Length > 2 && s[1] == '0' && s[2] == 'x')
return (char)Convert.ToUInt16(s.Substring(3), 16);
else
return (char)Convert.ToUInt16(s.Substring(1)); // will fail from here if invalid escape (e.g. \g)
}
private static readonly IEnumerable<char> CharacterSet = Enumerable.Range(0, char.MaxValue + 1).Select(Convert.ToChar).Where(c => !char.IsControl(c));
public static string ExpandCharacterSet(string set)
{
var sb = new StringBuilder();
int start = 0;
bool invertSet = false;
if (set.Length == 0)
return "";
if (set[0] == '[' && set[set.Length - 1] == ']')
set = set.Substring(1, set.Length - 2);
if (set[0] == '^')
{
invertSet = true;
set = set.Substring(1);
}
while (start < set.Length - 1)
{
int dash = set.IndexOf('-', start + 1);
if (dash <= 0 || dash >= set.Length - 1)
break;
sb.Append(set.Substring(start, dash - start - 1));
char a = set[dash - 1];
char z = set[dash + 1];
for (var i = a; i <= z; ++i)
sb.Append(i);
start = dash + 2;
}
sb.Append(set.Substring(start));
if (!invertSet) return sb.ToString();
var A = new HashSet<char>(CharacterSet);
var B = new HashSet<char>(sb.ToString());
A.ExceptWith(B);
return new string(A.ToArray());
}