I'm trying to create permutations of the Word "password", I have managed to create all permutations of the word with the code below. Currently this does Upper cases and lower cases. I need to Include permutations that have special characters for example where I will have "p#ssword". Where I'm replacing "a" with "#" and "o" "0".
Any idea how I can expand my implementation?
public static void WritePermutations(string pwd)
{
pwd = pwd.ToLower();
var myDict = new Dictionary<int, string>();
int Count = 0;
var results =
from e in Enumerable.Range(0, 1 << pwd.Length)
let p =
from b in Enumerable.Range(0, pwd.Length)
select (e & (1 << b)) == 0 ? (char?)null : pwd[b]
select string.Join(string.Empty, p);
foreach (string s in results)
{
string newValue = pwd;
s.ToLower();
foreach (char c in s)
{
var Old = c.ToString().ToLower();
var New = c.ToString().ToUpper();
newValue = ReplaceFirstOccurrence(newValue, Old, New);
Count++;
}
myDict.Add(Count, newValue);
}
foreach (var cred in myDict)
{
Console.WriteLine(cred.Value);
}
}
public static string ReplaceFirstOccurrence(string Source, string Find, string Replace)
{
int Place = Source.IndexOf(Find);
string result = Source.Remove(Place, Find.Length).Insert(Place, Replace);
return result;
}
How about starting off with these declarations and functions:
List<HashSet<char>> _charCombinations = new List<HashSet<char>> {
new HashSet<char> {'a','#'},
new HashSet<char> {'o', '0'},
};
HashSet<char> GetAlternatives(char c)
{
var result = new HashSet<char>();
foreach (var hashSet in _charCombinations)
{
if (hashSet.Contains(c))
{
foreach (char c2 in hashSet)
result.Add(c2);
}
}
if (char.IsLetter(c))
{
result.Add((String.Empty + c).ToUpper()[0]);
result.Add((String.Empty + c).ToLower()[0]);
}
else if (false) // any other char.Is-based logic
{
}
result.Add(c);
return result;
}
IEnumerable<string> GetTransformations(string s, int start)
{
char c = s[start - 1];
foreach (var c2 in GetAlternatives(c))
{
if (start == s.Length)
yield return String.Empty + c2;
else
{
var e = GetTransformations(s, start + 1).GetEnumerator();
while (e.MoveNext())
yield return c2 + e.Current;
}
}
}
You can then use them like this:
var e = GetTransformations("password", 1).GetEnumerator();
var result = new List<string>();
while (e.MoveNext())
result.Add(e.Current);
result.Sort((a,b) => string.CompareOrdinal(a, b));
This produces 576 strings (too long to list here), which is exactly what you'd expect for an 8-letter word where 6 of the chars have 2 possibilities, and the other 2 have 3 possibilities, i.e. 2x2x2x2x2x2x3x3
Related
I'm trying to develop a method that will match all strings between two strings:
I've tried this but it returns only the first match:
string ExtractString(string s, string start,string end)
{
// You should check for errors in real-world code, omitted for brevity
int startIndex = s.IndexOf(start) + start.Length;
int endIndex = s.IndexOf(end, startIndex);
return s.Substring(startIndex, endIndex - startIndex);
}
Let's suppose we have this string
String Text = "A1FIRSTSTRINGA2A1SECONDSTRINGA2akslakhflkshdflhksdfA1THIRDSTRINGA2"
I would like a c# function doing the following :
public List<string> ExtractFromString(String Text,String Start, String End)
{
List<string> Matched = new List<string>();
.
.
.
return Matched;
}
// Example of use
ExtractFromString("A1FIRSTSTRINGA2A1SECONDSTRINGA2akslakhflkshdflhksdfA1THIRDSTRINGA2","A1","A2")
// Will return :
// FIRSTSTRING
// SECONDSTRING
// THIRDSTRING
Thank you for your help !
private static List<string> ExtractFromBody(string body, string start, string end)
{
List<string> matched = new List<string>();
int indexStart = 0;
int indexEnd = 0;
bool exit = false;
while (!exit)
{
indexStart = body.IndexOf(start);
if (indexStart != -1)
{
indexEnd = indexStart + body.Substring(indexStart).IndexOf(end);
matched.Add(body.Substring(indexStart + start.Length, indexEnd - indexStart - start.Length));
body = body.Substring(indexEnd + end.Length);
}
else
{
exit = true;
}
}
return matched;
}
Here is a solution using RegEx. Don't forget to include the following using statement.
using System.Text.RegularExpressions
It will correctly return only text between the start and end strings given.
Will not be returned:
akslakhflkshdflhksdf
Will be returned:
FIRSTSTRING
SECONDSTRING
THIRDSTRING
It uses the regular expression pattern [start string].+?[end string]
The start and end strings are escaped in case they contain regular expression special characters.
private static List<string> ExtractFromString(string source, string start, string end)
{
var results = new List<string>();
string pattern = string.Format(
"{0}({1}){2}",
Regex.Escape(start),
".+?",
Regex.Escape(end));
foreach (Match m in Regex.Matches(source, pattern))
{
results.Add(m.Groups[1].Value);
}
return results;
}
You could make that into an extension method of String like this:
public static class StringExtensionMethods
{
public static List<string> EverythingBetween(this string source, string start, string end)
{
var results = new List<string>();
string pattern = string.Format(
"{0}({1}){2}",
Regex.Escape(start),
".+?",
Regex.Escape(end));
foreach (Match m in Regex.Matches(source, pattern))
{
results.Add(m.Groups[1].Value);
}
return results;
}
}
Usage:
string source = "A1FIRSTSTRINGA2A1SECONDSTRINGA2akslakhflkshdflhksdfA1THIRDSTRINGA2";
string start = "A1";
string end = "A2";
List<string> results = source.EverythingBetween(start, end);
text.Split(new[] {"A1", "A2"}, StringSplitOptions.RemoveEmptyEntries);
You can split the string into an array using the start identifier in following code:
String str = "A1FIRSTSTRINGA2A1SECONDSTRINGA2akslakhflkshdflhksdfA1THIRDSTRINGA2";
String[] arr = str.Split("A1");
Then iterate through your array and remove the last 2 characters of each string (to remove the A2). You'll also need to discard the first array element as it will be empty assuming the string starts with A1.
Code is untested, currently on a mobile
This is a generic solution, and I believe more readable code. Not tested, so beware.
public static IEnumerable<IList<T>> SplitBy<T>(this IEnumerable<T> source,
Func<T, bool> startPredicate,
Func<T, bool> endPredicate,
bool includeDelimiter)
{
var l = new List<T>();
foreach (var s in source)
{
if (startPredicate(s))
{
if (l.Any())
{
l = new List<T>();
}
l.Add(s);
}
else if (l.Any())
{
l.Add(s);
}
if (endPredicate(s))
{
if (includeDelimiter)
yield return l;
else
yield return l.GetRange(1, l.Count - 2);
l = new List<T>();
}
}
}
In your case you can call,
var text = "A1FIRSTSTRINGA2A1SECONDSTRINGA2akslakhflkshdflhksdfA1THIRDSTRINGA2";
var splits = text.SplitBy(x => x == "A1", x => x == "A2", false);
This is not the most efficient when you do not want the delimiter to be included (like your case) in result but efficient for opposite cases. To speed up your case one can directly call the GetEnumerator and make use of MoveNext.
I am trying to code the following example.
Input ABCDEFGHIJKLMNOPQRSTUVWXYZ
Output ZYXWVUTSRQPONMLKJIHGFEDCBA
If user types in A it will output Z. It has to go past 25 characters to reach Z. So I am guessing a while loop will be needed, then if B it has to go through 23 times, so – 2 and so on until it reaches M as it will skip though 1 to reach N, then start again at 25.
Any suggestion on how to approach this?
Capital ASCII characters range according to the ASCII-table from 65 (0x41, 'A') to 90 (0x5A, 'Z').
This is the algorithm:
// inputChar is a char holding your character
char inputChar = getCharFromUser();
int inputVal = inputChar - 65; // e.g. 0 for 'A', 1 for 'B'
char outputChar = 90 - inputVal; // e.g. 90 - 1 = 89 = 'Y'
outputCharToUser(outputChar);
And this is how you might implement it in C#:
while (true)
{
var key = Console.ReadKey(intercept: true);
var inputChar = char.ToUpper(key.KeyChar);
var outputChar = (char)('Z' - inputChar + 'A');
Console.Write("{0}={1} ", inputChar, outputChar);
}
You could use two dictionaries which enable to lookup the char from index and vice-versa:
var indexLookup = "abcdefghijklmnopqrstuvwxyz"
.Select((chr, index) => new { chr, index })
.ToDictionary(x => x.chr, x => x.index);
var charLookup = "abcdefghijklmnopqrstuvwxyz"
.Select((chr, index) => new { chr, index })
.ToDictionary(x => x.index, x => x.chr);
Now it's simple, the essential part is charLookup[25 - indexOfChar]:
string userInput = "B";
bool isUpper = char.IsUpper(userInput[0]);
char inputChar = Char.ToLowerInvariant(userInput[0]);
if(indexLookup.ContainsKey(inputChar))
{
int indexOfChar = indexLookup[inputChar];
char oppositeChar = charLookup[25 - indexOfChar];
string result = isUpper ? Char.ToUpperInvariant(oppositeChar).ToString() : oppositeChar.ToString();
Console.Write(result); // Y
}
Actually you don't need two dictionaries but only one since the string can already be used to lookup a char by index. Here is a class which provides the logic:
public class CharSwap
{
private string alphabet;
private Dictionary<char, int> indexLookup;
public CharSwap() : this("abcdefghijklmnopqrstuvwxyz") { }
public CharSwap(string alphabet)
{
if(alphabet == null) throw new ArgumentNullException("alphabet");
this.alphabet = alphabet;
indexLookup = alphabet.Select((chr, index) => new { chr, index }).ToDictionary(x => x.chr, x => x.index);
}
public char? OppositeChar(char input)
{
char lowerChar = Char.ToLowerInvariant(input);
if (!indexLookup.ContainsKey(lowerChar))
return null;
int indexOfChar = indexLookup[lowerChar];
int indexOpposite = alphabet.Length - 1 - indexOfChar;
return Char.IsUpper(input)
? Char.ToUpperInvariant(alphabet[indexOpposite])
: alphabet[indexOpposite];
}
}
Test:
CharSwap cw = new CharSwap();
char? oppositeChar = cw.OppositeChar('B');
Console.Write(oppositeChar);
char input = 'B';
string Range = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
char result = Range[Range.Length - 1 - Range.IndexOf(input)]; //Y
or maybe another approach
char input = 'B';
string Range = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
char result = Range.Reverse().ElementAt(Range.IndexOf(input)); //Y
How about something like this?
char[] alphabet = {'A','B', 'C'} // and so on
char[] mapsTo = {'Z', 'Y', 'X'} // and so on, excluded for brevity
public function string changeLetter(char input)
{
int i = 0;
foreach (char c in alphabet) {
if (c == input) {
return mapsTo[i];
}
i++;
}
return '';
}
Converted to c#:
char[] alphabet = {'A','B', 'C'}; // and so on
char[] mapsTo = {'Z', 'Y', 'X'}; // and so on, excluded for brevity
public string changeLetter(char input)
{
int i = 0;
foreach (char c in alphabet) {
if (c == input) {
return mapsTo[i].ToString();
}
i++;
}
return default(char).ToString();
}
You would call this function like this (for instance):
public static void RunProgram()
{
Console.WriteLine("Please type in character");
input = Console.ReadKey().KeyChar;
Console.WriteLine("You typed in " + input + ". This results in: " + ChangeInput(input));
}
... where "ChangeInput" is the function defined earlier.
The easy way to solve this is to use a Dictionary<char, char>:
public class Atbash {
static string source = "abcdefghijklmnopqrstuvwxyz";
static List<char> keys = source.ToList();
static List<char> values = source.Reverse().ToList();
static Dictionary<char, char> Converter = keys.ToDictionary(x => x, x => values[keys.IndexOf(x)]);
public static char Convert(char input)
{
char output;
bool isUpper = char.IsUpper(input);
input = char.ToLowerInvariant(input);
if(Converter.ContainsKey(input)) {
output = Converter[input];
return (isUpper) ? char.ToUpperInvariant(output) : output;
}
throw new ArgumentOutOfRangeException("Input char is unknown");
// of course, it can return default(char) instead of throwing an exception.
}
}
Why Atbash? read about it here.
I'm trying to develop a method that will match all strings between two strings:
I've tried this but it returns only the first match:
string ExtractString(string s, string start,string end)
{
// You should check for errors in real-world code, omitted for brevity
int startIndex = s.IndexOf(start) + start.Length;
int endIndex = s.IndexOf(end, startIndex);
return s.Substring(startIndex, endIndex - startIndex);
}
Let's suppose we have this string
String Text = "A1FIRSTSTRINGA2A1SECONDSTRINGA2akslakhflkshdflhksdfA1THIRDSTRINGA2"
I would like a c# function doing the following :
public List<string> ExtractFromString(String Text,String Start, String End)
{
List<string> Matched = new List<string>();
.
.
.
return Matched;
}
// Example of use
ExtractFromString("A1FIRSTSTRINGA2A1SECONDSTRINGA2akslakhflkshdflhksdfA1THIRDSTRINGA2","A1","A2")
// Will return :
// FIRSTSTRING
// SECONDSTRING
// THIRDSTRING
Thank you for your help !
private static List<string> ExtractFromBody(string body, string start, string end)
{
List<string> matched = new List<string>();
int indexStart = 0;
int indexEnd = 0;
bool exit = false;
while (!exit)
{
indexStart = body.IndexOf(start);
if (indexStart != -1)
{
indexEnd = indexStart + body.Substring(indexStart).IndexOf(end);
matched.Add(body.Substring(indexStart + start.Length, indexEnd - indexStart - start.Length));
body = body.Substring(indexEnd + end.Length);
}
else
{
exit = true;
}
}
return matched;
}
Here is a solution using RegEx. Don't forget to include the following using statement.
using System.Text.RegularExpressions
It will correctly return only text between the start and end strings given.
Will not be returned:
akslakhflkshdflhksdf
Will be returned:
FIRSTSTRING
SECONDSTRING
THIRDSTRING
It uses the regular expression pattern [start string].+?[end string]
The start and end strings are escaped in case they contain regular expression special characters.
private static List<string> ExtractFromString(string source, string start, string end)
{
var results = new List<string>();
string pattern = string.Format(
"{0}({1}){2}",
Regex.Escape(start),
".+?",
Regex.Escape(end));
foreach (Match m in Regex.Matches(source, pattern))
{
results.Add(m.Groups[1].Value);
}
return results;
}
You could make that into an extension method of String like this:
public static class StringExtensionMethods
{
public static List<string> EverythingBetween(this string source, string start, string end)
{
var results = new List<string>();
string pattern = string.Format(
"{0}({1}){2}",
Regex.Escape(start),
".+?",
Regex.Escape(end));
foreach (Match m in Regex.Matches(source, pattern))
{
results.Add(m.Groups[1].Value);
}
return results;
}
}
Usage:
string source = "A1FIRSTSTRINGA2A1SECONDSTRINGA2akslakhflkshdflhksdfA1THIRDSTRINGA2";
string start = "A1";
string end = "A2";
List<string> results = source.EverythingBetween(start, end);
text.Split(new[] {"A1", "A2"}, StringSplitOptions.RemoveEmptyEntries);
You can split the string into an array using the start identifier in following code:
String str = "A1FIRSTSTRINGA2A1SECONDSTRINGA2akslakhflkshdflhksdfA1THIRDSTRINGA2";
String[] arr = str.Split("A1");
Then iterate through your array and remove the last 2 characters of each string (to remove the A2). You'll also need to discard the first array element as it will be empty assuming the string starts with A1.
Code is untested, currently on a mobile
This is a generic solution, and I believe more readable code. Not tested, so beware.
public static IEnumerable<IList<T>> SplitBy<T>(this IEnumerable<T> source,
Func<T, bool> startPredicate,
Func<T, bool> endPredicate,
bool includeDelimiter)
{
var l = new List<T>();
foreach (var s in source)
{
if (startPredicate(s))
{
if (l.Any())
{
l = new List<T>();
}
l.Add(s);
}
else if (l.Any())
{
l.Add(s);
}
if (endPredicate(s))
{
if (includeDelimiter)
yield return l;
else
yield return l.GetRange(1, l.Count - 2);
l = new List<T>();
}
}
}
In your case you can call,
var text = "A1FIRSTSTRINGA2A1SECONDSTRINGA2akslakhflkshdflhksdfA1THIRDSTRINGA2";
var splits = text.SplitBy(x => x == "A1", x => x == "A2", false);
This is not the most efficient when you do not want the delimiter to be included (like your case) in result but efficient for opposite cases. To speed up your case one can directly call the GetEnumerator and make use of MoveNext.
How can I find all indexes of a pattern in a string using c#?
For example I want to find all ## pattern indexes in a string like this 45##78$$#56$$JK01UU
string pattern = "##";
string sentence = "45##78$$#56$$J##K01UU";
IList<int> indeces = new List<int>();
foreach (Match match in Regex.Matches(sentence, pattern))
{
indeces.Add(match.Index);
}
indeces will have 2, 14
Edited the code to make it a cleaner function.
public IEnumerable<int> FindAllIndexes(string str, string pattern)
{
int prevIndex = -pattern.Length; // so we start at index 0
int index;
while((index = str.IndexOf(pattern, prevIndex + pattern.Length)) != -1)
{
prevIndex = index;
yield return index;
}
}
string str = "45##78$$#56$$JK01UU";
string pattern = "##";
var indexes = FindAllIndexes(str, pattern);
You can get all the indices of a pattern in a string by using a regex search like this.
string input = "45##78$$#56$$JK01UU", pattern = Regex.Escape("##");
Regex rx = new Regex(pattern);
var indices = new List<int>();
var matches = rx.Matches(s);
for (int i=0 ; i<matches.Length ; i++)
{
indices.Add(matches[i].Index);
}
Another one that tries to be efficient:
public IEnumerable<int> FindPatternIndexes(string input, string search)
{
var sb = new StringBuilder(input);
for (var i = 0; search.Length <= sb.Length; i++)
{
if (sb.ToString().StartsWith(search)) yield return i;
sb.Remove(0,1);
}
}
Tested. Worked. But somewhat dumb.
string foo = "45##78$$#56$$JK01UU";
char[] fooChar = foo.ToCharArray();
int i = 0;
bool register = false;
foreach (char fc in fooChar)
{
if (fc == '#' && register == true)
{
MessageBox.Show("Index: " + (i-1));
}
else if (fc == '#')
{
register = true;
}
else
{
register = false;
}
i++;
}
Are there any ready-made functions for expanding a C# regex-style character set?
For example, expand("a-z1") would return a string containing all the characters a to z, followed by the number 1.
Here's what I've got so far:
public static string ExpandCharacterSet(string set)
{
var sb = new StringBuilder();
int start = 0;
while (start < set.Length - 1)
{
int dash = set.IndexOf('-', start + 1);
if (dash <= 0 || dash >= set.Length - 1)
break;
sb.Append(set.Substring(start, dash - start - 1));
char a = set[dash - 1];
char z = set[dash + 1];
for (var i = a; i <= z; ++i)
sb.Append(i);
start = dash + 2;
}
sb.Append(set.Substring(start));
return sb.ToString();
}
Is there anything I'm overlooking?
PS: Let's ignore negative character sets for now.
Thought my example was quite clear... let's try that again. This is what I want:
ExpandCharacterSet("a-fA-F0-9") == "abcdefABCDEF0123456789"
It took a bit of work to get this but here's what I was able to muster. Of course this is not going to be portable since I'm messing with internals. But it works well enough for simple test cases. It will accept any regex character class but will not work for negated classes. The range of values is way too broad without any restrictions. I don't know if it will be correct for all cases and it doesn't handle repetition at all but it's a start. At least you won't have to roll out your own parser. As of .NET Framework 4.0:
public static class RegexHelper
{
public static string ExpandCharClass(string charClass)
{
var regexParser = new RegexParser(CultureInfo.CurrentCulture);
regexParser.SetPattern(charClass);
var regexCharClass = regexParser.ScanCharClass(false);
int count = regexCharClass.RangeCount();
List<string> ranges = new List<string>();
// range 0 can be skipped
for (int i = 1; i < count; i++)
{
var range = regexCharClass.GetRangeAt(i);
ranges.Add(ExpandRange(range));
}
return String.Concat(ranges);
}
static string ExpandRange(SingleRange range)
{
char first = range._first;
char last = range._last;
return String.Concat(Enumerable.Range(first, last - first + 1).Select(i => (char)i));
}
internal class RegexParser
{
static readonly Type RegexParserType;
static readonly ConstructorInfo RegexParser_Ctor;
static readonly MethodInfo RegexParser_SetPattern;
static readonly MethodInfo RegexParser_ScanCharClass;
static RegexParser()
{
RegexParserType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexParser");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
RegexParser_Ctor = RegexParserType.GetConstructor(flags, null, new[] { typeof(CultureInfo) }, null);
RegexParser_SetPattern = RegexParserType.GetMethod("SetPattern", flags, null, new[] { typeof(String) }, null);
RegexParser_ScanCharClass = RegexParserType.GetMethod("ScanCharClass", flags, null, new[] { typeof(Boolean) }, null);
}
private readonly object instance;
internal RegexParser(CultureInfo culture)
{
instance = RegexParser_Ctor.Invoke(new object[] { culture });
}
internal void SetPattern(string pattern)
{
RegexParser_SetPattern.Invoke(instance, new object[] { pattern });
}
internal RegexCharClass ScanCharClass(bool caseInsensitive)
{
return new RegexCharClass(RegexParser_ScanCharClass.Invoke(instance, new object[] { caseInsensitive }));
}
}
internal class RegexCharClass
{
static readonly Type RegexCharClassType;
static readonly MethodInfo RegexCharClass_RangeCount;
static readonly MethodInfo RegexCharClass_GetRangeAt;
static RegexCharClass()
{
RegexCharClassType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
RegexCharClass_RangeCount = RegexCharClassType.GetMethod("RangeCount", flags, null, new Type[] { }, null);
RegexCharClass_GetRangeAt = RegexCharClassType.GetMethod("GetRangeAt", flags, null, new[] { typeof(Int32) }, null);
}
private readonly object instance;
internal RegexCharClass(object regexCharClass)
{
if (regexCharClass == null)
throw new ArgumentNullException("regexCharClass");
if (regexCharClass.GetType() != RegexCharClassType)
throw new ArgumentException("not an instance of a RegexCharClass object", "regexCharClass");
instance = regexCharClass;
}
internal int RangeCount()
{
return (int)RegexCharClass_RangeCount.Invoke(instance, new object[] { });
}
internal SingleRange GetRangeAt(int i)
{
return new SingleRange(RegexCharClass_GetRangeAt.Invoke(instance, new object[] { i }));
}
}
internal struct SingleRange
{
static readonly Type RegexCharClassSingleRangeType;
static readonly FieldInfo SingleRange_first;
static readonly FieldInfo SingleRange_last;
static SingleRange()
{
RegexCharClassSingleRangeType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass+SingleRange");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
SingleRange_first = RegexCharClassSingleRangeType.GetField("_first", flags);
SingleRange_last = RegexCharClassSingleRangeType.GetField("_last", flags);
}
internal char _first;
internal char _last;
internal SingleRange(object singleRange)
{
if (singleRange == null)
throw new ArgumentNullException("singleRange");
if (singleRange.GetType() != RegexCharClassSingleRangeType)
throw new ArgumentException("not an instance of a SingleRange object", "singleRange");
_first = (char)SingleRange_first.GetValue(singleRange);
_last = (char)SingleRange_last.GetValue(singleRange);
}
}
}
// usage:
RegexHelper.ExpandCharClass(#"[\-a-zA-F1 5-9]");
// "-abcdefghijklmnopqrstuvwxyzABCDEF1 56789"
Seems like a pretty unusual requirement, but since there are only about 96 characters that you can match (unless you include high chars), you might as well just test your regular expression against all of them, and output the matches:
public static string expando(string input_re) {
// add more chars in s as needed, such as ,.?/|=+_-éñ etc.
string s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
string output = "";
Regex exp = new Regex(input_re);
for (int i = 0; i < s.Length; i++) {
if (exp.IsMatch(s.Substring(i, 1))) {
output += s[i];
}
}
return output;
}
By using an actual regex to determine your character class, you can expand whatever regular expression you want, [^A-B]|[0123a-cg-h], for example.
Something like this?
var input = "a-fA-F0-9!";
var matches = Regex.Matches(input,#".-.|.");
var list = new StringBuilder();
foreach (Match m in matches)
{
var value = m.Value;
if (value.Length == 1)
list.Append(value);
else
{
if (value[2] < value[0]) throw new ArgumentException("invalid format"); // or switch, if you want.
for (char c = value[0]; c <= value[2]; c++)
list.Append(c);
}
}
Console.WriteLine(list);
Output:
abcdefABCDEF0123456789!
The moral, of course, is to solve your regex problems with more regex!
Here's a version with support for escape characters. It all depends how robust you want it to be... for example, I don't do anything special here to handle surrogates, so that probably won't work. Also, if you're trying to match the performance of a current regex engine exactly you'll need to know exactly what all the parameters are, which would be a fairly big job.
void Main()
{
//these are all equivalent:
var input = #"\x41-\0x46\u41";
var input2 = #"\65-\70\65";
var input3 = "A-FA";
// match hex as \0x123 or \x123 or \u123, or decimal \412, or the escapes \n\t\r, or any character
var charRegex = #"(\\(0?x|u)[0-9a-fA-F]+|\\[0-9]+|\\[ntr]|.)";
var matches = Regex.Matches(input, charRegex + "-" + charRegex + "|" + charRegex);
var list = new StringBuilder();
foreach (Match m in matches)
{
var dashIndex = m.Value.IndexOf('-', 1); //don't look at 0 (in case it's a dash)
if (dashIndex > 0) // this means we have two items: a range
{
var charLeft = Decode(m.Value.Substring(0,dashIndex));
var charRight = Decode(m.Value.Substring(dashIndex+1));
if (charRight < charLeft) throw new ArgumentException("invalid format (left bigger than right)"); // or switch, if you want.
for (char c = charLeft; c <= charRight; c++)
list.Append(c);
}
else // just one item
{
list.Append(Decode(m.Value));
}
}
Console.WriteLine(list);
}
char Decode(string s)
{
if (s.Length == 1)
return s[0];
// here, s[0] == '\', because of the regex
if (s.Length == 2)
switch (s[1])
{
// incomplete; add more as wished
case 'n': return '\n';
case 't': return '\t';
case 'r': return '\r';
default: break;
}
if (s[1] == 'u' || s[1] == 'x')
return (char)Convert.ToUInt16(s.Substring(2), 16);
else if (s.Length > 2 && s[1] == '0' && s[2] == 'x')
return (char)Convert.ToUInt16(s.Substring(3), 16);
else
return (char)Convert.ToUInt16(s.Substring(1)); // will fail from here if invalid escape (e.g. \g)
}
private static readonly IEnumerable<char> CharacterSet = Enumerable.Range(0, char.MaxValue + 1).Select(Convert.ToChar).Where(c => !char.IsControl(c));
public static string ExpandCharacterSet(string set)
{
var sb = new StringBuilder();
int start = 0;
bool invertSet = false;
if (set.Length == 0)
return "";
if (set[0] == '[' && set[set.Length - 1] == ']')
set = set.Substring(1, set.Length - 2);
if (set[0] == '^')
{
invertSet = true;
set = set.Substring(1);
}
while (start < set.Length - 1)
{
int dash = set.IndexOf('-', start + 1);
if (dash <= 0 || dash >= set.Length - 1)
break;
sb.Append(set.Substring(start, dash - start - 1));
char a = set[dash - 1];
char z = set[dash + 1];
for (var i = a; i <= z; ++i)
sb.Append(i);
start = dash + 2;
}
sb.Append(set.Substring(start));
if (!invertSet) return sb.ToString();
var A = new HashSet<char>(CharacterSet);
var B = new HashSet<char>(sb.ToString());
A.ExceptWith(B);
return new string(A.ToArray());
}