Algorithm for expanding a character set? - c#

Are there any ready-made functions for expanding a C# regex-style character set?
For example, expand("a-z1") would return a string containing all the characters a to z, followed by the number 1.
Here's what I've got so far:
public static string ExpandCharacterSet(string set)
{
var sb = new StringBuilder();
int start = 0;
while (start < set.Length - 1)
{
int dash = set.IndexOf('-', start + 1);
if (dash <= 0 || dash >= set.Length - 1)
break;
sb.Append(set.Substring(start, dash - start - 1));
char a = set[dash - 1];
char z = set[dash + 1];
for (var i = a; i <= z; ++i)
sb.Append(i);
start = dash + 2;
}
sb.Append(set.Substring(start));
return sb.ToString();
}
Is there anything I'm overlooking?
PS: Let's ignore negative character sets for now.
Thought my example was quite clear... let's try that again. This is what I want:
ExpandCharacterSet("a-fA-F0-9") == "abcdefABCDEF0123456789"

It took a bit of work to get this but here's what I was able to muster. Of course this is not going to be portable since I'm messing with internals. But it works well enough for simple test cases. It will accept any regex character class but will not work for negated classes. The range of values is way too broad without any restrictions. I don't know if it will be correct for all cases and it doesn't handle repetition at all but it's a start. At least you won't have to roll out your own parser. As of .NET Framework 4.0:
public static class RegexHelper
{
public static string ExpandCharClass(string charClass)
{
var regexParser = new RegexParser(CultureInfo.CurrentCulture);
regexParser.SetPattern(charClass);
var regexCharClass = regexParser.ScanCharClass(false);
int count = regexCharClass.RangeCount();
List<string> ranges = new List<string>();
// range 0 can be skipped
for (int i = 1; i < count; i++)
{
var range = regexCharClass.GetRangeAt(i);
ranges.Add(ExpandRange(range));
}
return String.Concat(ranges);
}
static string ExpandRange(SingleRange range)
{
char first = range._first;
char last = range._last;
return String.Concat(Enumerable.Range(first, last - first + 1).Select(i => (char)i));
}
internal class RegexParser
{
static readonly Type RegexParserType;
static readonly ConstructorInfo RegexParser_Ctor;
static readonly MethodInfo RegexParser_SetPattern;
static readonly MethodInfo RegexParser_ScanCharClass;
static RegexParser()
{
RegexParserType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexParser");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
RegexParser_Ctor = RegexParserType.GetConstructor(flags, null, new[] { typeof(CultureInfo) }, null);
RegexParser_SetPattern = RegexParserType.GetMethod("SetPattern", flags, null, new[] { typeof(String) }, null);
RegexParser_ScanCharClass = RegexParserType.GetMethod("ScanCharClass", flags, null, new[] { typeof(Boolean) }, null);
}
private readonly object instance;
internal RegexParser(CultureInfo culture)
{
instance = RegexParser_Ctor.Invoke(new object[] { culture });
}
internal void SetPattern(string pattern)
{
RegexParser_SetPattern.Invoke(instance, new object[] { pattern });
}
internal RegexCharClass ScanCharClass(bool caseInsensitive)
{
return new RegexCharClass(RegexParser_ScanCharClass.Invoke(instance, new object[] { caseInsensitive }));
}
}
internal class RegexCharClass
{
static readonly Type RegexCharClassType;
static readonly MethodInfo RegexCharClass_RangeCount;
static readonly MethodInfo RegexCharClass_GetRangeAt;
static RegexCharClass()
{
RegexCharClassType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
RegexCharClass_RangeCount = RegexCharClassType.GetMethod("RangeCount", flags, null, new Type[] { }, null);
RegexCharClass_GetRangeAt = RegexCharClassType.GetMethod("GetRangeAt", flags, null, new[] { typeof(Int32) }, null);
}
private readonly object instance;
internal RegexCharClass(object regexCharClass)
{
if (regexCharClass == null)
throw new ArgumentNullException("regexCharClass");
if (regexCharClass.GetType() != RegexCharClassType)
throw new ArgumentException("not an instance of a RegexCharClass object", "regexCharClass");
instance = regexCharClass;
}
internal int RangeCount()
{
return (int)RegexCharClass_RangeCount.Invoke(instance, new object[] { });
}
internal SingleRange GetRangeAt(int i)
{
return new SingleRange(RegexCharClass_GetRangeAt.Invoke(instance, new object[] { i }));
}
}
internal struct SingleRange
{
static readonly Type RegexCharClassSingleRangeType;
static readonly FieldInfo SingleRange_first;
static readonly FieldInfo SingleRange_last;
static SingleRange()
{
RegexCharClassSingleRangeType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass+SingleRange");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
SingleRange_first = RegexCharClassSingleRangeType.GetField("_first", flags);
SingleRange_last = RegexCharClassSingleRangeType.GetField("_last", flags);
}
internal char _first;
internal char _last;
internal SingleRange(object singleRange)
{
if (singleRange == null)
throw new ArgumentNullException("singleRange");
if (singleRange.GetType() != RegexCharClassSingleRangeType)
throw new ArgumentException("not an instance of a SingleRange object", "singleRange");
_first = (char)SingleRange_first.GetValue(singleRange);
_last = (char)SingleRange_last.GetValue(singleRange);
}
}
}
// usage:
RegexHelper.ExpandCharClass(#"[\-a-zA-F1 5-9]");
// "-abcdefghijklmnopqrstuvwxyzABCDEF1 56789"

Seems like a pretty unusual requirement, but since there are only about 96 characters that you can match (unless you include high chars), you might as well just test your regular expression against all of them, and output the matches:
public static string expando(string input_re) {
// add more chars in s as needed, such as ,.?/|=+_-éñ etc.
string s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
string output = "";
Regex exp = new Regex(input_re);
for (int i = 0; i < s.Length; i++) {
if (exp.IsMatch(s.Substring(i, 1))) {
output += s[i];
}
}
return output;
}
By using an actual regex to determine your character class, you can expand whatever regular expression you want, [^A-B]|[0123a-cg-h], for example.

Something like this?
var input = "a-fA-F0-9!";
var matches = Regex.Matches(input,#".-.|.");
var list = new StringBuilder();
foreach (Match m in matches)
{
var value = m.Value;
if (value.Length == 1)
list.Append(value);
else
{
if (value[2] < value[0]) throw new ArgumentException("invalid format"); // or switch, if you want.
for (char c = value[0]; c <= value[2]; c++)
list.Append(c);
}
}
Console.WriteLine(list);
Output:
abcdefABCDEF0123456789!
The moral, of course, is to solve your regex problems with more regex!
Here's a version with support for escape characters. It all depends how robust you want it to be... for example, I don't do anything special here to handle surrogates, so that probably won't work. Also, if you're trying to match the performance of a current regex engine exactly you'll need to know exactly what all the parameters are, which would be a fairly big job.
void Main()
{
//these are all equivalent:
var input = #"\x41-\0x46\u41";
var input2 = #"\65-\70\65";
var input3 = "A-FA";
// match hex as \0x123 or \x123 or \u123, or decimal \412, or the escapes \n\t\r, or any character
var charRegex = #"(\\(0?x|u)[0-9a-fA-F]+|\\[0-9]+|\\[ntr]|.)";
var matches = Regex.Matches(input, charRegex + "-" + charRegex + "|" + charRegex);
var list = new StringBuilder();
foreach (Match m in matches)
{
var dashIndex = m.Value.IndexOf('-', 1); //don't look at 0 (in case it's a dash)
if (dashIndex > 0) // this means we have two items: a range
{
var charLeft = Decode(m.Value.Substring(0,dashIndex));
var charRight = Decode(m.Value.Substring(dashIndex+1));
if (charRight < charLeft) throw new ArgumentException("invalid format (left bigger than right)"); // or switch, if you want.
for (char c = charLeft; c <= charRight; c++)
list.Append(c);
}
else // just one item
{
list.Append(Decode(m.Value));
}
}
Console.WriteLine(list);
}
char Decode(string s)
{
if (s.Length == 1)
return s[0];
// here, s[0] == '\', because of the regex
if (s.Length == 2)
switch (s[1])
{
// incomplete; add more as wished
case 'n': return '\n';
case 't': return '\t';
case 'r': return '\r';
default: break;
}
if (s[1] == 'u' || s[1] == 'x')
return (char)Convert.ToUInt16(s.Substring(2), 16);
else if (s.Length > 2 && s[1] == '0' && s[2] == 'x')
return (char)Convert.ToUInt16(s.Substring(3), 16);
else
return (char)Convert.ToUInt16(s.Substring(1)); // will fail from here if invalid escape (e.g. \g)
}

private static readonly IEnumerable<char> CharacterSet = Enumerable.Range(0, char.MaxValue + 1).Select(Convert.ToChar).Where(c => !char.IsControl(c));
public static string ExpandCharacterSet(string set)
{
var sb = new StringBuilder();
int start = 0;
bool invertSet = false;
if (set.Length == 0)
return "";
if (set[0] == '[' && set[set.Length - 1] == ']')
set = set.Substring(1, set.Length - 2);
if (set[0] == '^')
{
invertSet = true;
set = set.Substring(1);
}
while (start < set.Length - 1)
{
int dash = set.IndexOf('-', start + 1);
if (dash <= 0 || dash >= set.Length - 1)
break;
sb.Append(set.Substring(start, dash - start - 1));
char a = set[dash - 1];
char z = set[dash + 1];
for (var i = a; i <= z; ++i)
sb.Append(i);
start = dash + 2;
}
sb.Append(set.Substring(start));
if (!invertSet) return sb.ToString();
var A = new HashSet<char>(CharacterSet);
var B = new HashSet<char>(sb.ToString());
A.ExceptWith(B);
return new string(A.ToArray());
}

Related

C# word permutations with special characters

I'm trying to create permutations of the Word "password", I have managed to create all permutations of the word with the code below. Currently this does Upper cases and lower cases. I need to Include permutations that have special characters for example where I will have "p#ssword". Where I'm replacing "a" with "#" and "o" "0".
Any idea how I can expand my implementation?
public static void WritePermutations(string pwd)
{
pwd = pwd.ToLower();
var myDict = new Dictionary<int, string>();
int Count = 0;
var results =
from e in Enumerable.Range(0, 1 << pwd.Length)
let p =
from b in Enumerable.Range(0, pwd.Length)
select (e & (1 << b)) == 0 ? (char?)null : pwd[b]
select string.Join(string.Empty, p);
foreach (string s in results)
{
string newValue = pwd;
s.ToLower();
foreach (char c in s)
{
var Old = c.ToString().ToLower();
var New = c.ToString().ToUpper();
newValue = ReplaceFirstOccurrence(newValue, Old, New);
Count++;
}
myDict.Add(Count, newValue);
}
foreach (var cred in myDict)
{
Console.WriteLine(cred.Value);
}
}
public static string ReplaceFirstOccurrence(string Source, string Find, string Replace)
{
int Place = Source.IndexOf(Find);
string result = Source.Remove(Place, Find.Length).Insert(Place, Replace);
return result;
}
How about starting off with these declarations and functions:
List<HashSet<char>> _charCombinations = new List<HashSet<char>> {
new HashSet<char> {'a','#'},
new HashSet<char> {'o', '0'},
};
HashSet<char> GetAlternatives(char c)
{
var result = new HashSet<char>();
foreach (var hashSet in _charCombinations)
{
if (hashSet.Contains(c))
{
foreach (char c2 in hashSet)
result.Add(c2);
}
}
if (char.IsLetter(c))
{
result.Add((String.Empty + c).ToUpper()[0]);
result.Add((String.Empty + c).ToLower()[0]);
}
else if (false) // any other char.Is-based logic
{
}
result.Add(c);
return result;
}
IEnumerable<string> GetTransformations(string s, int start)
{
char c = s[start - 1];
foreach (var c2 in GetAlternatives(c))
{
if (start == s.Length)
yield return String.Empty + c2;
else
{
var e = GetTransformations(s, start + 1).GetEnumerator();
while (e.MoveNext())
yield return c2 + e.Current;
}
}
}
You can then use them like this:
var e = GetTransformations("password", 1).GetEnumerator();
var result = new List<string>();
while (e.MoveNext())
result.Add(e.Current);
result.Sort((a,b) => string.CompareOrdinal(a, b));
This produces 576 strings (too long to list here), which is exactly what you'd expect for an 8-letter word where 6 of the chars have 2 possibilities, and the other 2 have 3 possibilities, i.e. 2x2x2x2x2x2x3x3

Parse a custom file with a list of commands

There is a list of commands in a file that look like this:
command1 argument1 argument2
command2 argument3 argument4
And the result should look like
//Dictionary<command name,list of arguments>
Dictionary<string,List<string>>
Of course, there can be any amount of arguments, not just 2 of them. Parsing it is a piece of cake. But the thing is, there can be multi-line arguments.
command {some
amount
of random text
} {and the second
argument} and_here_goes_argument_3
This is where it gets tricky. I've created a while loop with if conditions to parse this file, but it took me like 200+ lines of code and was totally unreadable. I bet there is a better way to do this.
Of course, I am not asking you to write the code for me. All I need is a basic approach.
As for the language- it can be either C# or C++.
Showing how much pain doing it with regexes would be:
string text = #"command1 argument1 argument2
command2 argument3 argument4
command {some
amount
of random text
} {and the second
argument} and_here_goes_argument_3";
var rx = new Regex(#"^(?<command>(?:(?!\r|$)[^ ])*) +(?:(?<argument>{[^}]*}|(?!\r?$|{)(?:(?!\r|$)[^ ])+)(?: +\r?$?|\r?$))*", RegexOptions.Multiline | RegexOptions.ExplicitCapture);
var matches = rx.Matches(text);
foreach (Match match in matches)
{
Console.WriteLine($"Command: {match.Groups["command"].Value}");
foreach (Capture capture in match.Groups["argument"].Captures)
{
Console.WriteLine($" - arg: [{capture.Value}]");
}
Console.WriteLine();
}
The problem is that this regex is both unreadable and brittle. Try adding a x just after the argument}, like argument}x. Handling malformed text is very difficult.
The only interesting part is that I use the RegexOptions.Multiline to handle multiple lines of text, and that $ matches the \n but not the \r that I handle manually.
Paradoxically a small grammar using a library could be the "simplest" solution...
Ok now some "real" code:
private static readonly string[] commandDelimiters = new[] { " ", "\r", "\n" };
// We don't want the { to be used inside arguments that aren't in the form {...}
// Note that at this time there is no way to "escape" the }
private static readonly string[] argumentDelimiters = new[] { " ", "\r", "\n", "{" };
public static IEnumerable<Tuple<string, string[]>> ParseCommands(string str)
{
int ix = 0;
int line = 0;
int ixStartLine = 0;
var args = new List<string>();
while (ix < str.Length)
{
string command = ParseWord(str, ref ix, commandDelimiters);
if (command.Length == 0)
{
throw new Exception($"No command, at line {line}, col {ix - ixStartLine}");
}
while (true)
{
SkipSpaces(str, ref ix);
if (IsEOL(str, true, ref ix))
{
line++;
ixStartLine = ix;
break;
}
if (str[ix] == '{')
{
int ix2 = str.IndexOf('}', ix + 1);
if (ix2 == -1)
{
throw new Exception($"Unclosed {{ at line {line}, col {ix - ixStartLine}");
}
// Skipping the {
ix++;
// Skipping the }, because we don't do ix2 - ix -1
string arg = str.Substring(ix, ix2 - ix);
// We count the new lines "inside" the { }
for (int i = 0; i < arg.Length; )
{
if (IsEOL(arg, true, ref i))
{
line++;
ixStartLine = ix + i + 1;
}
else
{
i++;
}
}
// Skipping the }
ix = ix2 + 1;
// If there is no space of eol after the } then error
if (ix < str.Length && str[ix] != ' ' && !IsEOL(str, false, ref ix))
{
throw new Exception($"Unexpected character at line {line}, col {ix - ixStartLine}");
}
args.Add(arg);
}
else
{
string arg = ParseWord(str, ref ix, commandDelimiters);
// If the terminator is {, then error.
if (ix < str.Length && str[ix] == '{')
{
throw new Exception($"Unexpected character at line {line}, col {ix - ixStartLine}");
}
args.Add(arg);
}
}
var args2 = args.ToArray();
args.Clear();
yield return Tuple.Create(command, args2);
}
}
// Stops at any of terminators, doesn't "consume" it advancing ix
public static string ParseWord(string str, ref int ix, string[] terminators)
{
int start = ix;
int curr = ix;
while (curr < str.Length && !terminators.Any(x => string.CompareOrdinal(str, curr, x, 0, x.Length) == 0))
{
curr++;
}
ix = curr;
return str.Substring(start, curr - start);
}
public static bool SkipSpaces(string str, ref int ix)
{
bool atLeastOne = false;
while (ix < str.Length && str[ix] == ' ')
{
atLeastOne = true;
ix++;
}
return atLeastOne;
}
// \r\n, \r, \n, end-of-string == true
public static bool IsEOL(string str, bool advance, ref int ix)
{
if (ix == str.Length)
{
return true;
}
if (str[ix] == '\r')
{
if (advance)
{
if (ix + 1 < str.Length && str[ix + 1] == '\n')
{
ix += 2;
}
ix += 2;
}
return true;
}
if (str[ix] == '\n')
{
if (advance)
{
ix++;
}
return true;
}
return false;
}
It is quite long, but I do think it is quite clear to read. The errors should be very exact (line and col given). Note that there is no possible escaping for the }. Doing it in an elegant way is complex.
Use it like:
var res = ParseCommands(text).ToArray();

Find all pattern indexes in string in C#

How can I find all indexes of a pattern in a string using c#?
For example I want to find all ## pattern indexes in a string like this 45##78$$#56$$JK01UU
string pattern = "##";
string sentence = "45##78$$#56$$J##K01UU";
IList<int> indeces = new List<int>();
foreach (Match match in Regex.Matches(sentence, pattern))
{
indeces.Add(match.Index);
}
indeces will have 2, 14
Edited the code to make it a cleaner function.
public IEnumerable<int> FindAllIndexes(string str, string pattern)
{
int prevIndex = -pattern.Length; // so we start at index 0
int index;
while((index = str.IndexOf(pattern, prevIndex + pattern.Length)) != -1)
{
prevIndex = index;
yield return index;
}
}
string str = "45##78$$#56$$JK01UU";
string pattern = "##";
var indexes = FindAllIndexes(str, pattern);
You can get all the indices of a pattern in a string by using a regex search like this.
string input = "45##78$$#56$$JK01UU", pattern = Regex.Escape("##");
Regex rx = new Regex(pattern);
var indices = new List<int>();
var matches = rx.Matches(s);
for (int i=0 ; i<matches.Length ; i++)
{
indices.Add(matches[i].Index);
}
Another one that tries to be efficient:
public IEnumerable<int> FindPatternIndexes(string input, string search)
{
var sb = new StringBuilder(input);
for (var i = 0; search.Length <= sb.Length; i++)
{
if (sb.ToString().StartsWith(search)) yield return i;
sb.Remove(0,1);
}
}
Tested. Worked. But somewhat dumb.
string foo = "45##78$$#56$$JK01UU";
char[] fooChar = foo.ToCharArray();
int i = 0;
bool register = false;
foreach (char fc in fooChar)
{
if (fc == '#' && register == true)
{
MessageBox.Show("Index: " + (i-1));
}
else if (fc == '#')
{
register = true;
}
else
{
register = false;
}
i++;
}

Splitting CamelCase

This is all asp.net c#.
I have an enum
public enum ControlSelectionType
{
NotApplicable = 1,
SingleSelectRadioButtons = 2,
SingleSelectDropDownList = 3,
MultiSelectCheckBox = 4,
MultiSelectListBox = 5
}
The numerical value of this is stored in my database. I display this value in a datagrid.
<asp:boundcolumn datafield="ControlSelectionTypeId" headertext="Control Type"></asp:boundcolumn>
The ID means nothing to a user so I have changed the boundcolumn to a template column with the following.
<asp:TemplateColumn>
<ItemTemplate>
<%# Enum.Parse(typeof(ControlSelectionType), DataBinder.Eval(Container.DataItem, "ControlSelectionTypeId").ToString()).ToString()%>
</ItemTemplate>
</asp:TemplateColumn>
This is a lot better... However, it would be great if there was a simple function I can put around the Enum to split it by Camel case so that the words wrap nicely in the datagrid.
Note: I am fully aware that there are better ways of doing all this. This screen is purely used internally and I just want a quick hack in place to display it a little better.
I used:
public static string SplitCamelCase(string input)
{
return System.Text.RegularExpressions.Regex.Replace(input, "([A-Z])", " $1", System.Text.RegularExpressions.RegexOptions.Compiled).Trim();
}
Taken from http://weblogs.asp.net/jgalloway/archive/2005/09/27/426087.aspx
vb.net:
Public Shared Function SplitCamelCase(ByVal input As String) As String
Return System.Text.RegularExpressions.Regex.Replace(input, "([A-Z])", " $1", System.Text.RegularExpressions.RegexOptions.Compiled).Trim()
End Function
Here is a dotnet Fiddle for online execution of the c# code.
Indeed a regex/replace is the way to go as described in the other answer, however this might also be of use to you if you wanted to go a different direction
using System.ComponentModel;
using System.Reflection;
...
public static string GetDescription(System.Enum value)
{
FieldInfo fi = value.GetType().GetField(value.ToString());
DescriptionAttribute[] attributes = (DescriptionAttribute[])fi.GetCustomAttributes(typeof(DescriptionAttribute), false);
if (attributes.Length > 0)
return attributes[0].Description;
else
return value.ToString();
}
this will allow you define your Enums as
public enum ControlSelectionType
{
[Description("Not Applicable")]
NotApplicable = 1,
[Description("Single Select Radio Buttons")]
SingleSelectRadioButtons = 2,
[Description("Completely Different Display Text")]
SingleSelectDropDownList = 3,
}
Taken from
http://www.codeguru.com/forum/archive/index.php/t-412868.html
This regex (^[a-z]+|[A-Z]+(?![a-z])|[A-Z][a-z]+) can be used to extract all words from the camelCase or PascalCase name. It also works with abbreviations anywhere inside the name.
MyHTTPServer will contain exactly 3 matches: My, HTTP, Server
myNewXMLFile will contain 4 matches: my, New, XML, File
You could then join them into a single string using string.Join.
string name = "myNewUIControl";
string[] words = Regex.Matches(name, "(^[a-z]+|[A-Z]+(?![a-z])|[A-Z][a-z]+)")
.OfType<Match>()
.Select(m => m.Value)
.ToArray();
string result = string.Join(" ", words);
As #DanielB noted in the comments, that regex won't work for numbers (and with underscores), so here is an improved version that supports any identifier with words, acronyms, numbers, underscores (slightly modified #JoeJohnston's version), see online demo (fiddle):
([A-Z]+(?![a-z])|[A-Z][a-z]+|[0-9]+|[a-z]+)
Extreme example: __snake_case12_camelCase_TLA1ABC → snake, case, 12, camel, Case, TLA, 1, ABC
Tillito's answer does not handle strings already containing spaces well, or Acronyms. This fixes it:
public static string SplitCamelCase(string input)
{
return Regex.Replace(input, "(?<=[a-z])([A-Z])", " $1", RegexOptions.Compiled);
}
If C# 3.0 is an option you can use the following one-liner to do the job:
Regex.Matches(YOUR_ENUM_VALUE_NAME, "[A-Z][a-z]+").OfType<Match>().Select(match => match.Value).Aggregate((acc, b) => acc + " " + b).TrimStart(' ');
Here's an extension method that handles numbers and multiple uppercase characters sanely, and also allows for upper-casing specific acronyms in the final string:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Globalization;
using System.Text.RegularExpressions;
using System.Web.Configuration;
namespace System
{
/// <summary>
/// Extension methods for the string data type
/// </summary>
public static class ConventionBasedFormattingExtensions
{
/// <summary>
/// Turn CamelCaseText into Camel Case Text.
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
/// <remarks>Use AppSettings["SplitCamelCase_AllCapsWords"] to specify a comma-delimited list of words that should be ALL CAPS after split</remarks>
/// <example>
/// wordWordIDWord1WordWORDWord32Word2
/// Word Word ID Word 1 Word WORD Word 32 Word 2
///
/// wordWordIDWord1WordWORDWord32WordID2ID
/// Word Word ID Word 1 Word WORD Word 32 Word ID 2 ID
///
/// WordWordIDWord1WordWORDWord32Word2Aa
/// Word Word ID Word 1 Word WORD Word 32 Word 2 Aa
///
/// wordWordIDWord1WordWORDWord32Word2A
/// Word Word ID Word 1 Word WORD Word 32 Word 2 A
/// </example>
public static string SplitCamelCase(this string input)
{
if (input == null) return null;
if (string.IsNullOrWhiteSpace(input)) return "";
var separated = input;
separated = SplitCamelCaseRegex.Replace(separated, #" $1").Trim();
//Set ALL CAPS words
if (_SplitCamelCase_AllCapsWords.Any())
foreach (var word in _SplitCamelCase_AllCapsWords)
separated = SplitCamelCase_AllCapsWords_Regexes[word].Replace(separated, word.ToUpper());
//Capitalize first letter
var firstChar = separated.First(); //NullOrWhiteSpace handled earlier
if (char.IsLower(firstChar))
separated = char.ToUpper(firstChar) + separated.Substring(1);
return separated;
}
private static readonly Regex SplitCamelCaseRegex = new Regex(#"
(
(?<=[a-z])[A-Z0-9] (?# lower-to-other boundaries )
|
(?<=[0-9])[a-zA-Z] (?# number-to-other boundaries )
|
(?<=[A-Z])[0-9] (?# cap-to-number boundaries; handles a specific issue with the next condition )
|
(?<=[A-Z])[A-Z](?=[a-z]) (?# handles longer strings of caps like ID or CMS by splitting off the last capital )
)"
, RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace
);
private static readonly string[] _SplitCamelCase_AllCapsWords =
(WebConfigurationManager.AppSettings["SplitCamelCase_AllCapsWords"] ?? "")
.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries)
.Select(a => a.ToLowerInvariant().Trim())
.ToArray()
;
private static Dictionary<string, Regex> _SplitCamelCase_AllCapsWords_Regexes;
private static Dictionary<string, Regex> SplitCamelCase_AllCapsWords_Regexes
{
get
{
if (_SplitCamelCase_AllCapsWords_Regexes == null)
{
_SplitCamelCase_AllCapsWords_Regexes = new Dictionary<string,Regex>();
foreach(var word in _SplitCamelCase_AllCapsWords)
_SplitCamelCase_AllCapsWords_Regexes.Add(word, new Regex(#"\b" + word + #"\b", RegexOptions.Compiled | RegexOptions.IgnoreCase));
}
return _SplitCamelCase_AllCapsWords_Regexes;
}
}
}
}
You can use C# extension methods
public static string SpacesFromCamel(this string value)
{
if (value.Length > 0)
{
var result = new List<char>();
char[] array = value.ToCharArray();
foreach (var item in array)
{
if (char.IsUpper(item) && result.Count > 0)
{
result.Add(' ');
}
result.Add(item);
}
return new string(result.ToArray());
}
return value;
}
Then you can use it like
var result = "TestString".SpacesFromCamel();
Result will be
Test String
Using LINQ:
var chars = ControlSelectionType.NotApplicable.ToString().SelectMany((x, i) => i > 0 && char.IsUpper(x) ? new char[] { ' ', x } : new char[] { x });
Console.WriteLine(new string(chars.ToArray()));
I also have an enum which I had to separate. In my case this method solved the problem-
string SeparateCamelCase(string str)
{
for (int i = 1; i < str.Length; i++)
{
if (char.IsUpper(str[i]))
{
str = str.Insert(i, " ");
i++;
}
}
return str;
}
public enum ControlSelectionType
{
NotApplicable = 1,
SingleSelectRadioButtons = 2,
SingleSelectDropDownList = 3,
MultiSelectCheckBox = 4,
MultiSelectListBox = 5
}
public class NameValue
{
public string Name { get; set; }
public object Value { get; set; }
}
public static List<NameValue> EnumToList<T>(bool camelcase)
{
var array = (T[])(Enum.GetValues(typeof(T)).Cast<T>());
var array2 = Enum.GetNames(typeof(T)).ToArray<string>();
List<NameValue> lst = null;
for (int i = 0; i < array.Length; i++)
{
if (lst == null)
lst = new List<NameValue>();
string name = "";
if (camelcase)
{
name = array2[i].CamelCaseFriendly();
}
else
name = array2[i];
T value = array[i];
lst.Add(new NameValue { Name = name, Value = value });
}
return lst;
}
public static string CamelCaseFriendly(this string pascalCaseString)
{
Regex r = new Regex("(?<=[a-z])(?<x>[A-Z])|(?<=.)(?<x>[A-Z])(?=[a-z])");
return r.Replace(pascalCaseString, " ${x}");
}
//In your form
protected void Button1_Click1(object sender, EventArgs e)
{
DropDownList1.DataSource = GeneralClass.EnumToList<ControlSelectionType >(true); ;
DropDownList1.DataTextField = "Name";
DropDownList1.DataValueField = "Value";
DropDownList1.DataBind();
}
The solution from Eoin Campbell works good except if you have a Web Service.
You would need to do the Following as the Description Attribute is not serializable.
[DataContract]
public enum ControlSelectionType
{
[EnumMember(Value = "Not Applicable")]
NotApplicable = 1,
[EnumMember(Value = "Single Select Radio Buttons")]
SingleSelectRadioButtons = 2,
[EnumMember(Value = "Completely Different Display Text")]
SingleSelectDropDownList = 3,
}
public static string GetDescriptionFromEnumValue(Enum value)
{
EnumMemberAttribute attribute = value.GetType()
.GetField(value.ToString())
.GetCustomAttributes(typeof(EnumMemberAttribute), false)
.SingleOrDefault() as EnumMemberAttribute;
return attribute == null ? value.ToString() : attribute.Value;
}
And if you don't fancy using regex - try this:
public static string SeperateByCamelCase(this string text, char splitChar = ' ') {
var output = new StringBuilder();
for (int i = 0; i < text.Length; i++)
{
var c = text[i];
//if not the first and the char is upper
if (i > 0 && char.IsUpper(c)) {
var wasLastLower = char.IsLower(text[i - 1]);
if (i + 1 < text.Length) //is there a next
{
var isNextUpper = char.IsUpper(text[i + 1]);
if (!isNextUpper) //if next is not upper (start of a word).
{
output.Append(splitChar);
}
else if (wasLastLower) //last was lower but i'm upper and my next is an upper (start of an achromin). 'abcdHTTP' 'abcd HTTP'
{
output.Append(splitChar);
}
}
else
{
//last letter - if its upper and the last letter was lower 'abcd' to 'abcd A'
if (wasLastLower)
{
output.Append(splitChar);
}
}
}
output.Append(c);
}
return output.ToString();
}
Passes these tests, it doesn't like numbers but i didn't need it to.
[TestMethod()]
public void ToCamelCaseTest()
{
var testData = new string[] { "AAACamel", "AAA", "SplitThisByCamel", "AnA", "doesnothing", "a", "A", "aasdasdAAA" };
var expectedData = new string[] { "AAA Camel", "AAA", "Split This By Camel", "An A", "doesnothing", "a", "A", "aasdasd AAA" };
for (int i = 0; i < testData.Length; i++)
{
var actual = testData[i].SeperateByCamelCase();
var expected = expectedData[i];
Assert.AreEqual(actual, expected);
}
}
#JustSayNoToRegex
Takes a C# identifier, with uderscores and numbers, and converts it to space-separated string.
public static class StringExtensions
{
public static string SplitOnCase(this string identifier)
{
if (identifier == null || identifier.Length == 0) return string.Empty;
var sb = new StringBuilder();
if (identifier.Length == 1) sb.Append(char.ToUpperInvariant(identifier[0]));
else if (identifier.Length == 2) sb.Append(char.ToUpperInvariant(identifier[0])).Append(identifier[1]);
else {
if (identifier[0] != '_') sb.Append(char.ToUpperInvariant(identifier[0]));
for (int i = 1; i < identifier.Length; i++) {
var current = identifier[i];
var previous = identifier[i - 1];
if (current == '_' && previous == '_') continue;
else if (current == '_') {
sb.Append(' ');
}
else if (char.IsLetter(current) && previous == '_') {
sb.Append(char.ToUpperInvariant(current));
}
else if (char.IsDigit(current) && char.IsLetter(previous)) {
sb.Append(' ').Append(current);
}
else if (char.IsLetter(current) && char.IsDigit(previous)) {
sb.Append(' ').Append(char.ToUpperInvariant(current));
}
else if (char.IsUpper(current) && char.IsLower(previous)
&& (i < identifier.Length - 1 && char.IsUpper(identifier[i + 1]) || i == identifier.Length - 1)) {
sb.Append(' ').Append(current);
}
else if (char.IsUpper(current) && i < identifier.Length - 1 && char.IsLower(identifier[i + 1])) {
sb.Append(' ').Append(current);
}
else {
sb.Append(current);
}
}
}
return sb.ToString();
}
}
Tests:
[TestFixture]
static class HelpersTests
{
[Test]
public static void Basic()
{
Assert.AreEqual("Foo", "foo".SplitOnCase());
Assert.AreEqual("Foo", "_foo".SplitOnCase());
Assert.AreEqual("Foo", "__foo".SplitOnCase());
Assert.AreEqual("Foo", "___foo".SplitOnCase());
Assert.AreEqual("Foo 2", "foo2".SplitOnCase());
Assert.AreEqual("Foo 23", "foo23".SplitOnCase());
Assert.AreEqual("Foo 23 A", "foo23A".SplitOnCase());
Assert.AreEqual("Foo 23 Ab", "foo23Ab".SplitOnCase());
Assert.AreEqual("Foo 23 Ab", "foo23_ab".SplitOnCase());
Assert.AreEqual("Foo 23 Ab", "foo23___ab".SplitOnCase());
Assert.AreEqual("Foo 23", "foo__23".SplitOnCase());
Assert.AreEqual("Foo Bar", "Foo_bar".SplitOnCase());
Assert.AreEqual("Foo Bar", "Foo____bar".SplitOnCase());
Assert.AreEqual("AAA", "AAA".SplitOnCase());
Assert.AreEqual("Foo A Aa", "fooAAa".SplitOnCase());
Assert.AreEqual("Foo AAA", "fooAAA".SplitOnCase());
Assert.AreEqual("Foo Bar", "FooBar".SplitOnCase());
Assert.AreEqual("Mn M", "MnM".SplitOnCase());
Assert.AreEqual("AS", "aS".SplitOnCase());
Assert.AreEqual("As", "as".SplitOnCase());
Assert.AreEqual("A", "a".SplitOnCase());
Assert.AreEqual("_", "_".SplitOnCase());
}
}
Simple version similar to some of the above, but with logic to not auto-insert the separator (which is by default, a space, but can be any char) if there's already one at the current position.
Uses a StringBuilder rather than 'mutating' strings.
public static string SeparateCamelCase(this string value, char separator = ' ') {
var sb = new StringBuilder();
var lastChar = separator;
foreach (var currentChar in value) {
if (char.IsUpper(currentChar) && lastChar != separator)
sb.Append(separator);
sb.Append(currentChar);
lastChar = currentChar;
}
return sb.ToString();
}
Example:
Input : 'ThisIsATest'
Output : 'This Is A Test'
Input : 'This IsATest'
Output : 'This Is A Test' (Note: Still only one space between 'This' and 'Is')
Input : 'ThisIsATest' (with separator '_')
Output : 'This_Is_A_Test'
Try this:
using System;
using System.Linq;
using System.Collections.Generic;
public class Program
{
public static void Main()
{
Console
.WriteLine(
SeparateByCamelCase("TestString") == "Test String" // True
);
}
public static string SeparateByCamelCase(string str)
{
return String.Join(" ", SplitByCamelCase(str));
}
public static IEnumerable<string> SplitByCamelCase(string str)
{
if (str.Length == 0)
return new List<string>();
return
new List<string>
{
Head(str)
}
.Concat(
SplitByCamelCase(
Tail(str)
)
);
}
public static string Head(string str)
{
return new String(
str
.Take(1)
.Concat(
str
.Skip(1)
.TakeWhile(IsLower)
)
.ToArray()
);
}
public static string Tail(string str)
{
return new String(
str
.Skip(
Head(str).Length
)
.ToArray()
);
}
public static bool IsLower(char ch)
{
return ch >= 'a' && ch <= 'z';
}
}
See sample online

How to determine if a File Matches a File Mask?

I need to decide whether file name fits to file mask. The file mask could contain * or ? characters. Is there any simple solution for this?
bool bFits = Fits("myfile.txt", "my*.txt");
private bool Fits(string sFileName, string sFileMask)
{
??? anything simple here ???
}
I appreciate finding Joel's answer--saved me some time as well ! I did, however, have to make a few changes to make the method do what most users would expect:
I removed the 'this' keyword preceding the first argument. It does nothing here (though it could be useful if the method is intended to be an extension method, in which case it needs to be public and contained within a static class and itself be a static method).
I made the regular expression case-independent to match standard Windows wildcard behavior (so e.g. "c*.*" and "C*.*" both return the same result).
I added starting and ending anchors to the regular expression, again to match standard Windows wildcard behavior (so e.g. "stuff.txt" would be matched by "stuff*" or "s*" or "s*.*" but not by just "s").
private bool FitsMask(string fileName, string fileMask)
{
Regex mask = new Regex(
'^' +
fileMask
.Replace(".", "[.]")
.Replace("*", ".*")
.Replace("?", ".")
+ '$',
RegexOptions.IgnoreCase);
return mask.IsMatch(fileName);
}
2009.11.04 Update: Match one of several masks
For even more flexibility, here is a plug-compatible method built on top of the original. This version lets you pass multiple masks (hence the plural on the second parameter name fileMasks) separated by lines, commas, vertical bars, or spaces. I wanted it so that I could let the user put as many choices as desired in a ListBox and then select all files matching any of them. Note that some controls (like a ListBox) use CR-LF for line breaks while others (e.g. RichTextBox) use just LF--that is why both "\r\n" and "\n" show up in the Split list.
private bool FitsOneOfMultipleMasks(string fileName, string fileMasks)
{
return fileMasks
.Split(new string[] {"\r\n", "\n", ",", "|", " "},
StringSplitOptions.RemoveEmptyEntries)
.Any(fileMask => FitsMask(fileName, fileMask));
}
2009.11.17 Update: Handle fileMask inputs more gracefully
The earlier version of FitsMask (which I have left in for comparison) does a fair job but since we are treating it as a regular expression it will throw an exception if it is not a valid regular expression when it comes in. The solution is that we actually want any regex metacharacters in the input fileMask to be considered literals, not metacharacters. But we still need to treat period, asterisk, and question mark specially. So this improved version of FitsMask safely moves these three characters out of the way, transforms all remaining metacharacters into literals, then puts the three interesting characters back, in their "regex'ed" form.
One other minor improvement is to allow for case-independence, per standard Windows behavior.
private bool FitsMask(string fileName, string fileMask)
{
string pattern =
'^' +
Regex.Escape(fileMask.Replace(".", "__DOT__")
.Replace("*", "__STAR__")
.Replace("?", "__QM__"))
.Replace("__DOT__", "[.]")
.Replace("__STAR__", ".*")
.Replace("__QM__", ".")
+ '$';
return new Regex(pattern, RegexOptions.IgnoreCase).IsMatch(fileName);
}
2010.09.30 Update: Somewhere along the way, passion ensued...
I have been remiss in not updating this earlier but these references will likely be of interest to readers who have made it to this point:
I embedded the FitsMask method as the heart of a WinForms user control aptly called a FileMask--see the API here.
I then wrote an article featuring the FileMask control published on Simple-Talk.com, entitled Using LINQ Lambda Expressions to Design Customizable Generic Components. (While the method itself does not use LINQ, the FileMask user control does, hence the title of the article.)
Try this:
private bool FitsMask(string sFileName, string sFileMask)
{
Regex mask = new Regex(sFileMask.Replace(".", "[.]").Replace("*", ".*").Replace("?", "."));
return mask.IsMatch(sFileName);
}
Many people don't know that, but .NET includes an internal class, called "PatternMatcher" (under the "System.IO" namespace).
This static class contains only 1 method:
public static bool StrictMatchPattern(string expression, string name)
This method is used by .net whenever it needs to compare files with wildcard (FileSystemWatcher, GetFiles(), etc)
Using reflector, I exposed the code here.
Didn't really go through it to understand how it works, but it works great,
So this is the code for anyone who doesn't want to work with the inefficient RegEx way:
public static class PatternMatcher
{
// Fields
private const char ANSI_DOS_QM = '<';
private const char ANSI_DOS_STAR = '>';
private const char DOS_DOT = '"';
private const int MATCHES_ARRAY_SIZE = 16;
// Methods
public static bool StrictMatchPattern(string expression, string name)
{
expression = expression.ToLowerInvariant();
name = name.ToLowerInvariant();
int num9;
char ch = '\0';
char ch2 = '\0';
int[] sourceArray = new int[16];
int[] numArray2 = new int[16];
bool flag = false;
if (((name == null) || (name.Length == 0)) || ((expression == null) || (expression.Length == 0)))
{
return false;
}
if (expression.Equals("*") || expression.Equals("*.*"))
{
return true;
}
if ((expression[0] == '*') && (expression.IndexOf('*', 1) == -1))
{
int length = expression.Length - 1;
if ((name.Length >= length) && (string.Compare(expression, 1, name, name.Length - length, length, StringComparison.OrdinalIgnoreCase) == 0))
{
return true;
}
}
sourceArray[0] = 0;
int num7 = 1;
int num = 0;
int num8 = expression.Length * 2;
while (!flag)
{
int num3;
if (num < name.Length)
{
ch = name[num];
num3 = 1;
num++;
}
else
{
flag = true;
if (sourceArray[num7 - 1] == num8)
{
break;
}
}
int index = 0;
int num5 = 0;
int num6 = 0;
while (index < num7)
{
int num2 = (sourceArray[index++] + 1) / 2;
num3 = 0;
Label_00F2:
if (num2 != expression.Length)
{
num2 += num3;
num9 = num2 * 2;
if (num2 == expression.Length)
{
numArray2[num5++] = num8;
}
else
{
ch2 = expression[num2];
num3 = 1;
if (num5 >= 14)
{
int num11 = numArray2.Length * 2;
int[] destinationArray = new int[num11];
Array.Copy(numArray2, destinationArray, numArray2.Length);
numArray2 = destinationArray;
destinationArray = new int[num11];
Array.Copy(sourceArray, destinationArray, sourceArray.Length);
sourceArray = destinationArray;
}
if (ch2 == '*')
{
numArray2[num5++] = num9;
numArray2[num5++] = num9 + 1;
goto Label_00F2;
}
if (ch2 == '>')
{
bool flag2 = false;
if (!flag && (ch == '.'))
{
int num13 = name.Length;
for (int i = num; i < num13; i++)
{
char ch3 = name[i];
num3 = 1;
if (ch3 == '.')
{
flag2 = true;
break;
}
}
}
if ((flag || (ch != '.')) || flag2)
{
numArray2[num5++] = num9;
numArray2[num5++] = num9 + 1;
}
else
{
numArray2[num5++] = num9 + 1;
}
goto Label_00F2;
}
num9 += num3 * 2;
switch (ch2)
{
case '<':
if (flag || (ch == '.'))
{
goto Label_00F2;
}
numArray2[num5++] = num9;
goto Label_028D;
case '"':
if (flag)
{
goto Label_00F2;
}
if (ch == '.')
{
numArray2[num5++] = num9;
goto Label_028D;
}
break;
}
if (!flag)
{
if (ch2 == '?')
{
numArray2[num5++] = num9;
}
else if (ch2 == ch)
{
numArray2[num5++] = num9;
}
}
}
}
Label_028D:
if ((index < num7) && (num6 < num5))
{
while (num6 < num5)
{
int num14 = sourceArray.Length;
while ((index < num14) && (sourceArray[index] < numArray2[num6]))
{
index++;
}
num6++;
}
}
}
if (num5 == 0)
{
return false;
}
int[] numArray4 = sourceArray;
sourceArray = numArray2;
numArray2 = numArray4;
num7 = num5;
}
num9 = sourceArray[num7 - 1];
return (num9 == num8);
}
}
None of these answers quite seem to do the trick, and msorens's is needlessly complex. This one should work just fine:
public static Boolean MatchesMask(string fileName, string fileMask)
{
String convertedMask = "^" + Regex.Escape(fileMask).Replace("\\*", ".*").Replace("\\?", ".") + "$";
Regex regexMask = new Regex(convertedMask, RegexOptions.IgnoreCase);
return regexMask.IsMatch(fileName);
}
This makes sure possible regex chars in the mask are escaped, replaces the \* and \?, and surrounds it all by ^ and $ to mark the boundaries.
Of course, in most situations, it's far more useful to simply make this into a FileMaskToRegex tool function which returns the Regex object, so you just got it once and can then make a loop in which you check all strings from your files list on it.
public static Regex FileMaskToRegex(string fileMask)
{
String convertedMask = "^" + Regex.Escape(fileMask).Replace("\\*", ".*").Replace("\\?", ".") + "$";
return new Regex(convertedMask, RegexOptions.IgnoreCase);
}
Use WildCardPattern class from System.Management.Automation available as NuGet package or in Windows PowerShell SDK.
WildcardPattern pattern = new WildcardPattern("my*.txt");
bool fits = pattern.IsMatch("myfile.txt");
From Windows 7 using P/Invoke (without 260 char count limit):
// UNICODE_STRING for Rtl... method
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Unicode)]
public struct UNICODE_STRING
{
public ushort Length;
public ushort MaximumLength;
[MarshalAs(UnmanagedType.LPWStr)]
string Buffer;
public UNICODE_STRING(string buffer)
{
if (buffer == null)
Length = MaximumLength = 0;
else
Length = MaximumLength = unchecked((ushort)(buffer.Length * 2));
Buffer = buffer;
}
}
// RtlIsNameInExpression method from NtDll.dll system library
public static class NtDll
{
[DllImport("NtDll.dll", CharSet=CharSet.Unicode, ExactSpelling=true)]
[return: MarshalAs(UnmanagedType.U1)]
public extern static bool RtlIsNameInExpression(
ref UNICODE_STRING Expression,
ref UNICODE_STRING Name,
[MarshalAs(UnmanagedType.U1)]
bool IgnoreCase,
IntPtr Zero
);
}
public bool MatchMask(string mask, string fileName)
{
// Expression must be uppercase for IgnoreCase == true (see MSDN for RtlIsNameInExpression)
UNICODE_STRING expr = new UNICODE_STRING(mask.ToUpper());
UNICODE_STRING name = new UNICODE_STRING(fileName);
if (NtDll.RtlIsNameInExpression(ref expr, ref name, true, IntPtr.Zero))
{
// MATCHES !!!
}
}
Fastest version of the previously proposed function:
public static bool FitsMasks(string filePath, params string[] fileMasks)
// or
public static Regex FileMasksToRegex(params string[] fileMasks)
{
if (!_maskRegexes.ContainsKey(fileMasks))
{
StringBuilder sb = new StringBuilder("^");
bool first = true;
foreach (string fileMask in fileMasks)
{
if(first) first =false; else sb.Append("|");
sb.Append('(');
foreach (char c in fileMask)
{
switch (c)
{
case '*': sb.Append(#".*"); break;
case '?': sb.Append(#"."); break;
default:
sb.Append(Regex.Escape(c.ToString()));
break;
}
}
sb.Append(')');
}
sb.Append("$");
_maskRegexes[fileMasks] = new Regex(sb.ToString(), RegexOptions.IgnoreCase);
}
return _maskRegexes[fileMasks].IsMatch(filePath);
// or
return _maskRegexes[fileMasks];
}
static readonly Dictionary<string[], Regex> _maskRegexes = new Dictionary<string[], Regex>(/*unordered string[] comparer*/);
Notes:
Re-using Regex objects.
Using StringBuilder to optimize Regex creation (multiple .Replace() calls are slow).
Multiple masks, combined with OR.
Another version returning the Regex.
If PowerShell is available, it has direct support for wildcard type matching (as well as Regex).
WildcardPattern pat = new WildcardPattern("a*.b*");
if (pat.IsMatch(filename)) { ... }
I didn't want to copy the source code and like #frankhommers I came up with a reflection based solution.
Notice the code comment about the use of wildcards in the name argument I found in the reference source.
public static class PatternMatcher
{
static MethodInfo strictMatchPatternMethod;
static PatternMatcher()
{
var typeName = "System.IO.PatternMatcher";
var methodName = "StrictMatchPattern";
var assembly = typeof(Uri).Assembly;
var type = assembly.GetType(typeName, true);
strictMatchPatternMethod = type.GetMethod(methodName, BindingFlags.Static | BindingFlags.Public) ?? throw new MissingMethodException($"{typeName}.{methodName} not found");
}
/// <summary>
/// Tells whether a given name matches the expression given with a strict (i.e. UNIX like) semantics.
/// </summary>
/// <param name="expression">Supplies the input expression to check against</param>
/// <param name="name">Supplies the input name to check for.</param>
/// <returns></returns>
public static bool StrictMatchPattern(string expression, string name)
{
// https://referencesource.microsoft.com/#system/services/io/system/io/PatternMatcher.cs
// If this class is ever exposed for generic use,
// we need to make sure that name doesn't contain wildcards. Currently
// the only component that calls this method is FileSystemWatcher and
// it will never pass a name that contains a wildcard.
if (name.Contains('*')) throw new FormatException("Wildcard not allowed");
return (bool)strictMatchPatternMethod.Invoke(null, new object[] { expression, name });
}
}
For .net Core the way microsoft does.
private bool MatchPattern(ReadOnlySpan<char> relativePath)
{
ReadOnlySpan<char> name = IO.Path.GetFileName(relativePath);
if (name.Length == 0)
return false;
if (Filters.Count == 0)
return true;
foreach (string filter in Filters)
{
if (FileSystemName.MatchesSimpleExpression(filter, name, ignoreCase: !PathInternal.IsCaseSensitive))
return true;
}
return false;
}
The way microsoft itself seemed to do for .NET 4.6 is documented in github:
private bool MatchPattern(string relativePath) {
string name = System.IO.Path.GetFileName(relativePath);
if (name != null)
return PatternMatcher.StrictMatchPattern(filter.ToUpper(CultureInfo.InvariantCulture), name.ToUpper(CultureInfo.InvariantCulture));
else
return false;
}
My version, which supports ** wild card:
static Regex FileMask2Regex(string mask)
{
var sb = new StringBuilder(mask);
// hide wildcards
sb.Replace("**", "affefa0d52e84c2db78f5510117471aa-StarStar");
sb.Replace("*", "affefa0d52e84c2db78f5510117471aa-Star");
sb.Replace("?", "affefa0d52e84c2db78f5510117471aa-Question");
sb.Replace("/", "affefa0d52e84c2db78f5510117471aa-Slash");
sb.Replace("\\", "affefa0d52e84c2db78f5510117471aa-Slash");
sb = new StringBuilder(Regex.Escape(sb.ToString()));
// unhide wildcards
sb.Replace("affefa0d52e84c2db78f5510117471aa-StarStar", #".*");
sb.Replace("affefa0d52e84c2db78f5510117471aa-Star", #"[^/\\]*");
sb.Replace("affefa0d52e84c2db78f5510117471aa-Question", #"[^/\\]");
sb.Replace("affefa0d52e84c2db78f5510117471aa-Slash", #"[/\\]");
sb.Append("$");
// allowed to have prefix
sb.Insert(0, #"^(?:.*?[/\\])?");
return new Regex(sb.ToString(), RegexOptions.IgnoreCase);
}
How about using reflection to get access to the function in the .NET framework?
Like this:
public class PatternMatcher
{
public delegate bool StrictMatchPatternDelegate(string expression, string name);
public StrictMatchPatternDelegate StrictMatchPattern;
public PatternMatcher()
{
Type patternMatcherType = typeof(FileSystemWatcher).Assembly.GetType("System.IO.PatternMatcher");
MethodInfo patternMatchMethod = patternMatcherType.GetMethod("StrictMatchPattern", System.Reflection.BindingFlags.Static | System.Reflection.BindingFlags.Public);
StrictMatchPattern = (expression, name) => (bool)patternMatchMethod.Invoke(null, new object[] { expression, name });
}
}
void Main()
{
PatternMatcher patternMatcher = new PatternMatcher();
Console.WriteLine(patternMatcher.StrictMatchPattern("*.txt", "test.txt")); //displays true
Console.WriteLine(patternMatcher.StrictMatchPattern("*.doc", "test.txt")); //displays false
}

Categories

Resources