I am using the following C# code to modify a lowercase letter to uppercase after a single quote:
public virtual string FirstName
{
get { return _firstName; }
set
{
if (value != null)
{
int pos = value.IndexOf("'", 0);
int strlength = value.Length - 1;
if (pos >= 0 && pos != strlength)
{
string temp = value[pos + 1].ToString();
temp = temp.ToUpper();
value = value.Remove(pos + 1, 1);
value = value.Insert(pos + 1, temp);
}
}
}
}
To me this looks like overkill. Is there an easier way to achieve the desired result:
Value: Mc'donald
Expected: Mc'Donald
here is without regex
int pos = data.IndexOf("'");
if (pos >= 0 && pos < data.Length - 1)
{
StringBuilder sbl = new StringBuilder(data);
sbl[pos + 1] = char.ToUpper(sbl[pos + 1]);
data = sbl.ToString();
}
Since you're open to Regex, would this overload of the Regex.Replace do what you need?
Regex.Replace Method (String, MatchEvaluator)
Here's a modified version of the example given at the link above. I've changed it to use the '\w pattern and to return the match in upper case.
using System;
using System.Text.RegularExpressions;
class RegExSample
{
static string CapText(Match m)
{
// Return the match in upper case
return m.ToString().ToUpperInvariant();
}
static void Main()
{
string text = "Mc'donald";
System.Console.WriteLine("text=[" + text + "]");
Regex rx = new Regex(#"'\w");
string result = rx.Replace(text, new MatchEvaluator(RegExSample.CapText));
System.Console.WriteLine("result=[" + result + "]");
}
}
Perhaps regular expressions?
string value = "Mc'donald";
string found = Regex.Match(value, "'[\\w]").Value;
string result = value.Replace(found, found.ToUpper());
Console.WriteLine(result); // Mc'Donald
Related
I need to be able to extract a string between 2 tags for example: "00002" from "morenonxmldata<tag1>0002</tag1>morenonxmldata"
I am using C# and .NET 3.5.
Regex regex = new Regex("<tag1>(.*)</tag1>");
var v = regex.Match("morenonxmldata<tag1>0002</tag1>morenonxmldata");
string s = v.Groups[1].ToString();
Or (as mentioned in the comments) to match the minimal subset:
Regex regex = new Regex("<tag1>(.*?)</tag1>");
Regex class is in System.Text.RegularExpressions namespace.
Solution without need of regular expression:
string ExtractString(string s, string tag) {
// You should check for errors in real-world code, omitted for brevity
var startTag = "<" + tag + ">";
int startIndex = s.IndexOf(startTag) + startTag.Length;
int endIndex = s.IndexOf("</" + tag + ">", startIndex);
return s.Substring(startIndex, endIndex - startIndex);
}
A Regex approach using lazy match and back-reference:
foreach (Match match in Regex.Matches(
"morenonxmldata<tag1>0002</tag1>morenonxmldata<tag2>abc</tag2>asd",
#"<([^>]+)>(.*?)</\1>"))
{
Console.WriteLine("{0}={1}",
match.Groups[1].Value,
match.Groups[2].Value);
}
Extracting contents between two known values can be useful for later as well. So why not create an extension method for it. Here is what i do, Short and simple...
public static string GetBetween(this string content, string startString, string endString)
{
int Start=0, End=0;
if (content.Contains(startString) && content.Contains(endString))
{
Start = content.IndexOf(startString, 0) + startString.Length;
End = content.IndexOf(endString, Start);
return content.Substring(Start, End - Start);
}
else
return string.Empty;
}
string input = "Exemple of value between two string FirstString text I want to keep SecondString end of my string";
var match = Regex.Match(input, #"FirstString (.+?) SecondString ").Groups[1].Value;
To get Single/Multiple values without regular expression
// For Single
var value = inputString.Split("<tag1>", "</tag1>")[1];
// For Multiple
var values = inputString.Split("<tag1>", "</tag1>").Where((_, index) => index % 2 != 0);
For future reference, I found this code snippet at http://www.mycsharpcorner.com/Post.aspx?postID=15 If you need to search for different "tags" it works very well.
public static string[] GetStringInBetween(string strBegin,
string strEnd, string strSource,
bool includeBegin, bool includeEnd)
{
string[] result ={ "", "" };
int iIndexOfBegin = strSource.IndexOf(strBegin);
if (iIndexOfBegin != -1)
{
// include the Begin string if desired
if (includeBegin)
iIndexOfBegin -= strBegin.Length;
strSource = strSource.Substring(iIndexOfBegin
+ strBegin.Length);
int iEnd = strSource.IndexOf(strEnd);
if (iEnd != -1)
{
// include the End string if desired
if (includeEnd)
iEnd += strEnd.Length;
result[0] = strSource.Substring(0, iEnd);
// advance beyond this segment
if (iEnd + strEnd.Length < strSource.Length)
result[1] = strSource.Substring(iEnd
+ strEnd.Length);
}
}
else
// stay where we are
result[1] = strSource;
return result;
}
I strip before and after data.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Text.RegularExpressions;
namespace testApp
{
class Program
{
static void Main(string[] args)
{
string tempString = "morenonxmldata<tag1>0002</tag1>morenonxmldata";
tempString = Regex.Replace(tempString, "[\\s\\S]*<tag1>", "");//removes all leading data
tempString = Regex.Replace(tempString, "</tag1>[\\s\\S]*", "");//removes all trailing data
Console.WriteLine(tempString);
Console.ReadLine();
}
}
}
Without RegEx, with some must-have value checking
public static string ExtractString(string soapMessage, string tag)
{
if (string.IsNullOrEmpty(soapMessage))
return soapMessage;
var startTag = "<" + tag + ">";
int startIndex = soapMessage.IndexOf(startTag);
startIndex = startIndex == -1 ? 0 : startIndex + startTag.Length;
int endIndex = soapMessage.IndexOf("</" + tag + ">", startIndex);
endIndex = endIndex > soapMessage.Length || endIndex == -1 ? soapMessage.Length : endIndex;
return soapMessage.Substring(startIndex, endIndex - startIndex);
}
public string between2finer(string line, string delimiterFirst, string delimiterLast)
{
string[] splitterFirst = new string[] { delimiterFirst };
string[] splitterLast = new string[] { delimiterLast };
string[] splitRes;
string buildBuffer;
splitRes = line.Split(splitterFirst, 100000, System.StringSplitOptions.RemoveEmptyEntries);
buildBuffer = splitRes[1];
splitRes = buildBuffer.Split(splitterLast, 100000, System.StringSplitOptions.RemoveEmptyEntries);
return splitRes[0];
}
private void button1_Click(object sender, EventArgs e)
{
string manyLines = "Received: from exim by isp2.ihc.ru with local (Exim 4.77) \nX-Failed-Recipients: rmnokixm#gmail.com\nFrom: Mail Delivery System <Mailer-Daemon#isp2.ihc.ru>";
MessageBox.Show(between2finer(manyLines, "X-Failed-Recipients: ", "\n"));
}
Is there a simple way for masking E-Mail addresses using Regular Expressions in C#?
My E-Mail:
myawesomeuser#there.com
My goal:
**awesome****#there.com (when 'awesome' was part of the pattern)
So it's more like an inverted replacement where evertyhing that does not actually match will be replaced with *.
Note: The domain should never be replaced!
From a performance side of view, would it make more sense to split by the # and only check the first part then put it back together afterwards?
Note: I don't want to check if the E-Mail is valid or not. It's just a simple inverted replacement and only for my current needs, the string is an E-Mail but for sure it can be any other string as well.
Solution
After reading the comments I ended up with an extension-method for strings which perfectly matches my needs.
public static string MaskEmail(this string eMail, string pattern)
{
var ix1 = eMail.IndexOf(pattern, StringComparison.Ordinal);
var ix2 = eMail.IndexOf('#');
// Corner case no-#
if (ix2 == -1)
{
ix2 = eMail.Length;
}
string result;
if (ix1 != -1 && ix1 < ix2)
{
result = new string('*', ix1) + pattern + new string('*', ix2 - ix1 - pattern.Length) + eMail.Substring(ix2);
}
else
{
// corner case no str found, all the pre-# is replaced
result = new string('*', ix2) + eMail.Substring(ix2);
}
return result;
}
which then can be called
string eMail = myawesomeuser#there.com;
string maskedMail = eMail.MaskEmail("awesome"); // **awesome****#there.com
string email = "myawesomeuser#there.com";
string str = "awesome";
string rx = "^((?!" + Regex.Escape(str) + "|#).)*|(?<!#.*)(?<=" + Regex.Escape(str) + ")((?!#).)*";
string email2 = Regex.Replace(email, rx, x => {
return new string('*', x.Length);
});
There are two sub-regular expressions here:
^((?!" + Regex.Escape(str) + "|#).)*
and
(?<!#.*)(?<=" + Regex.Escape(str) + ")((?!#).)*
They are in | (or)
The first one means: from the start of the string, any character but stop when you find str (escaped) or #
The second one means: there mustn't be a # before the start of this matching and, starting from str (escaped), replace any character stopping at the #
Probably faster/easier to read:
string email = "myawesomeuser#there.com";
string str = "awesome";
int ix1 = email.IndexOf(str);
int ix2 = email.IndexOf('#');
// Corner case no-#
if (ix2 == -1) {
ix2 = email.Length;
}
string email3;
if (ix1 != -1 && ix1 < ix2) {
email3 = new string('*', ix1) + str + new string('*', ix2 - ix1 - str.Length) + email.Substring(ix2);
} else {
// corner case no str found, all the pre-# is replaced
email3 = new string('*', ix2) + email.Substring(ix2);
}
This second version is better because it handle corner cases like: string not found and no domain in the email.
(awesome)|.(?=.*#)
Try this.Replace by *$1.But there will be an extra * at the start.So remove a * from the masked email from the start.See demo.
https://regex101.com/r/wU7sQ0/29
Non RE;
string name = "awesome";
int pat = email.IndexOf('#');
int pname = email.IndexOf(name);
if (pname < pat)
email = new String('*', pat - name.Length).Insert(pname, name) + email.Substring(pat);
i have string with date , i want to split it with date and string
For example :
I have this type of strings data
9/23/2013/marking abandoned based on notes below/DB
12/8/2012/I think the thid is string/SG
and i want to make it like as
9/23/2013 marking abandoned based on notes below/DB
12/8/2013 I think the thid is string/SG
so, i don't know how to split these strings and store in different columns of table.
pls help me.
string[] vals = { "9/23/2013/marking abandoned based on notes below/DB",
"12/8/2012/I think the thid is string/SG" };
var regex = #"(\d{1,2}/\d{1,2}/\d{4})/(.*)";
var matches = vals.Select(val => Regex.Match(vals, regex));
foreach (var match in matches)
{
Console.WriteLine ("{0} {1}", match.Groups[1], match.Groups[2]);
}
prints:
9/23/2013 marking abandoned based on notes below/DB
12/8/2012 I think the thid is string/SG
(\d{1,2}/\d{1,2}/\d{4})/(.*) breaks down to
(\d{1,2}/\d{1,2}/\d{4}):
\d{1,2} - matches any one or two digit number
/ - matches to one / symbol
\d{4} - matches to four digit number
(...) - denotes first group
(.*) - matches everything else and creates second group
Another way to do it with LINQ:
var inputs = new[]{
"9/23/2013/marking abandoned based on notes below/DB",
"12/8/2012/I think the thid is string/SG"
};
foreach (var item in inputs)
{
int counter = 0;
var r = item.Split('/')
.Aggregate("", (a, b) =>
a + ((counter++ == 3) ? "\t" : ((counter == 1) ? "" : "/")) + b);
Console.WriteLine(r);
}
Or you may use the IndexOf and Substring methods:
foreach (var item in inputs)
{
var lastPos =
item.IndexOf('/',
1 + item.IndexOf('/',
1 + item.IndexOf('/')));
if (lastPos != -1)
{
var r = String.Join("\t",
item.Substring(0, lastPos),
item.Substring(lastPos + 1, item.Length - lastPos - 1));
Console.WriteLine(r);
}
}
Perhaps with pure string methods, the third slash separates the date and the text:
string line = "9/23/2013/marking abandoned based on notes below/DB";
int slashIndex = line.IndexOf('/');
if(slashIndex >= 0)
{
int slashCount = 1;
while(slashCount < 3 && slashIndex >= 0)
{
slashIndex = line.IndexOf('/', slashIndex + 1);
if(slashIndex >= 0) slashCount++;
}
if(slashCount == 3)
{
Console.WriteLine("Date:{0} Text: {1}"
, line.Substring(0, slashIndex)
, line.Substring(slashIndex +1));
}
}
For what it's worth, here is a extension method to "break" a string in half on nth occurence of astring:
public static class StringExtensions
{
public static string[] BreakOnNthIndexOf(this string input, string value, int breakOn, StringComparison comparison)
{
if (breakOn <= 0)
throw new ArgumentException("breakOn must be greater than 0", "breakOn");
if (value == null) value = " "; // fallback on white-space
int slashIndex = input.IndexOf(value, comparison);
if (slashIndex >= 0)
{
int slashCount = 1;
while (slashCount < breakOn && slashIndex >= 0)
{
slashIndex = input.IndexOf(value, slashIndex + value.Length, comparison);
if (slashIndex >= 0) slashCount++;
}
if (slashCount == breakOn)
{
return new[] {
input.Substring(0, slashIndex),
input.Substring(slashIndex + value.Length)
};
}
}
return new[]{ input };
}
}
Use it in this way:
string line1 = "9/23/2013/marking abandoned based on notes below/DB";
string line2 = "12/8/2012/I think the thid is string/SG";
string[] res1 = line1.BreakOnNthIndexOf("/", 3, StringComparison.OrdinalIgnoreCase);
string[] res2 = line2.BreakOnNthIndexOf("/", 3, StringComparison.OrdinalIgnoreCase);
I have like a three word expression: "Shut The Door" and I want to find it in a sentence. Since They are kind of seperated by space what would be the best solution for it.
If you have the string:
string sample = "If you know what's good for you, you'll shut the door!";
And you want to find where it is in a sentence, you can use the IndexOf method.
int index = sample.IndexOf("shut the door");
// index will be 42
A non -1 answer means the string has been located. -1 means it does not exist in the string. Please note that the search string ("shut the door") is case sensitive.
Use build in Regex.Match Method for matching strings.
string text = "One car red car blue car";
string pat = #"(\w+)\s+(car)";
// Compile the regular expression.
Regex r = new Regex(pat, RegexOptions.IgnoreCase);
// Match the regular expression pattern against a text string.
Match m = r.Match(text);
int matchCount = 0;
while (m.Success)
{
Console.WriteLine("Match"+ (++matchCount));
for (int i = 1; i <= 2; i++)
{
Group g = m.Groups[i];
Console.WriteLine("Group"+i+"='" + g + "'");
CaptureCollection cc = g.Captures;
for (int j = 0; j < cc.Count; j++)
{
Capture c = cc[j];
System.Console.WriteLine("Capture"+j+"='" + c + "', Position="+c.Index);
}
}
m = m.NextMatch();
}
http://msdn.microsoft.com/en-us/library/system.text.regularexpressions.regex.match(v=vs.71).aspx
http://support.microsoft.com/kb/308252
if (string1.indexOf(string2) >= 0)
...
The spaces are nothing special, they are just characters, so you can find a string like this like yuo would find any other string in your sentence, for example using "indexOf" if you need the position, or just "Contains" if you need to know if it exists or not.
E.g.
string sentence = "foo bar baz";
string phrase = "bar baz";
Console.WriteLine(sentence.Contains(phrase)); // True
Here is some C# code to find a substrings using a start string and end string point but you can use as a base and modify (i.e. remove need for end string) to just find your string...
2 versions, one to just find the first instance of a substring, other returns a dictionary of all starting positions of the substring and the actual string.
public Dictionary<int, string> GetSubstringDic(string start, string end, string source, bool includeStartEnd, bool caseInsensitive)
{
int startIndex = -1;
int endIndex = -1;
int length = -1;
int sourceLength = source.Length;
Dictionary<int, string> result = new Dictionary<int, string>();
try
{
//if just want to find string, case insensitive
if (caseInsensitive)
{
source = source.ToLower();
start = start.ToLower();
end = end.ToLower();
}
//does start string exist
startIndex = source.IndexOf(start);
if (startIndex != -1)
{
//start to check for each instance of matches for the length of the source string
while (startIndex < sourceLength && startIndex > -1)
{
//does end string exist?
endIndex = source.IndexOf(end, startIndex + 1);
if (endIndex != -1)
{
//if we want to get length of string including the start and end strings
if (includeStartEnd)
{
//make sure to include the end string
length = (endIndex + end.Length) - startIndex;
}
else
{
//change start index to not include the start string
startIndex = startIndex + start.Length;
length = endIndex - startIndex;
}
//add to dictionary
result.Add(startIndex, source.Substring(startIndex, length));
//move start position up
startIndex = source.IndexOf(start, endIndex + 1);
}
else
{
//no end so break out of while;
break;
}
}
}
}
catch (Exception ex)
{
//Notify of Error
result = new Dictionary<int, string>();
StringBuilder g_Error = new StringBuilder();
g_Error.AppendLine("GetSubstringDic: " + ex.Message.ToString());
g_Error.AppendLine(ex.StackTrace.ToString());
}
return result;
}
public string GetSubstring(string start, string end, string source, bool includeStartEnd, bool caseInsensitive)
{
int startIndex = -1;
int endIndex = -1;
int length = -1;
int sourceLength = source.Length;
string result = string.Empty;
try
{
if (caseInsensitive)
{
source = source.ToLower();
start = start.ToLower();
end = end.ToLower();
}
startIndex = source.IndexOf(start);
if (startIndex != -1)
{
endIndex = source.IndexOf(end, startIndex + 1);
if (endIndex != -1)
{
if (includeStartEnd)
{
length = (endIndex + end.Length) - startIndex;
}
else
{
startIndex = startIndex + start.Length;
length = endIndex - startIndex;
}
result = source.Substring(startIndex, length);
}
}
}
catch (Exception ex)
{
//Notify of Error
result = string.Empty;
StringBuilder g_Error = new StringBuilder();
g_Error.AppendLine("GetSubstring: " + ex.Message.ToString());
g_Error.AppendLine(ex.StackTrace.ToString());
}
return result;
}
You may want to make sure the check ignores the case of both phrases.
string theSentence = "I really want you to shut the door.";
string thePhrase = "Shut The Door";
bool phraseIsPresent = theSentence.ToUpper().Contains(thePhrase.ToUpper());
int phraseStartsAt = theSentence.IndexOf(
thePhrase,
StringComparison.InvariantCultureIgnoreCase);
Console.WriteLine("Is the phrase present? " + phraseIsPresent);
Console.WriteLine("The phrase starts at character: " + phraseStartsAt);
This outputs:
Is the phrase present? True
The phrase starts at character: 21
I have a search method that takes in a user-entered string, splits it at each space character and then proceeds to find matches based on the list of separated terms:
string[] terms = searchTerms.ToLower().Trim().Split( ' ' );
Now I have been given a further requirement: to be able to search for phrases via double quote delimiters a la Google. So if the search terms provided were:
"a line of" text
The search would match occurrences of "a line of" and "text" rather than the four separate terms [the open and closing double quotes would also need to be removed before searching].
How can I achieve this in C#? I would assume regular expressions would be the way to go, but haven't dabbled in them much so don't know if they are the best solution.
If you need any more info, please ask. Thanks in advance for the help.
Here's a regex pattern that would return matches in groups named 'term':
("(?<term>[^"]+)"\s*|(?<term>[^ ]+)\s*)+
So for the input:
"a line" of text
The output items identified by the 'term' group would be:
a line
of
text
Regular expressions would definitely be the way to go...
You should check this MSDN link out for some info on the Regex class:
http://msdn.microsoft.com/en-us/library/system.text.regularexpressions.regex.aspx
and here is an excellent link to learn some regular expression syntax:
http://www.radsoftware.com.au/articles/regexlearnsyntax.aspx
Then to add some code examples, you could be doing it something along these lines:
string searchString = "a line of";
Match m = Regex.Match(textToSearch, searchString);
or if you just want to find out if the string contains a match or not:
bool success = Regex.Match(textToSearch, searchString).Success;
use the regular expression builder here
http://gskinner.com/RegExr/
and you will be able to manipulate the regular expression to how you need it displayed
Use Regexs....
string textToSearchIn = ""a line of" text";
string result = Regex.Match(textToSearchIn, "(?<=").*?(?=")").Value;
or if more then one, put this into a match collection...
MatchCollection allPhrases = Regex.Matches(textToSearchIn, "(?<=").*?(?=")");
The Knuth-Morris-Pratt (KMP algorithm)is recognised as the fastest algorithm for finding substrings in strings (well, technically not strings but byte-arrays).
using System.Collections.Generic;
namespace KMPSearch
{
public class KMPSearch
{
public static int NORESULT = -1;
private string _needle;
private string _haystack;
private int[] _jumpTable;
public KMPSearch(string haystack, string needle)
{
Haystack = haystack;
Needle = needle;
}
public void ComputeJumpTable()
{
//Fix if we are looking for just one character...
if (Needle.Length == 1)
{
JumpTable = new int[1] { -1 };
}
else
{
int needleLength = Needle.Length;
int i = 2;
int k = 0;
JumpTable = new int[needleLength];
JumpTable[0] = -1;
JumpTable[1] = 0;
while (i <= needleLength)
{
if (i == needleLength)
{
JumpTable[needleLength - 1] = k;
}
else if (Needle[k] == Needle[i])
{
k++;
JumpTable[i] = k;
}
else if (k > 0)
{
JumpTable[i - 1] = k;
k = 0;
}
i++;
}
}
}
public int[] MatchAll()
{
List<int> matches = new List<int>();
int offset = 0;
int needleLength = Needle.Length;
int m = Match(offset);
while (m != NORESULT)
{
matches.Add(m);
offset = m + needleLength;
m = Match(offset);
}
return matches.ToArray();
}
public int Match()
{
return Match(0);
}
public int Match(int offset)
{
ComputeJumpTable();
int haystackLength = Haystack.Length;
int needleLength = Needle.Length;
if ((offset >= haystackLength) || (needleLength > ( haystackLength - offset)))
return NORESULT;
int haystackIndex = offset;
int needleIndex = 0;
while (haystackIndex < haystackLength)
{
if (needleIndex >= needleLength)
return haystackIndex;
if (haystackIndex + needleIndex >= haystackLength)
return NORESULT;
if (Haystack[haystackIndex + needleIndex] == Needle[needleIndex])
{
needleIndex++;
}
else
{
//Naive solution
haystackIndex += needleIndex;
//Go back
if (needleIndex > 1)
{
//Index of the last matching character is needleIndex - 1!
haystackIndex -= JumpTable[needleIndex - 1];
needleIndex = JumpTable[needleIndex - 1];
}
else
haystackIndex -= JumpTable[needleIndex];
}
}
return NORESULT;
}
public string Needle
{
get { return _needle; }
set { _needle = value; }
}
public string Haystack
{
get { return _haystack; }
set { _haystack = value; }
}
public int[] JumpTable
{
get { return _jumpTable; }
set { _jumpTable = value; }
}
}
}
Usage :-
using System;
using System.Collections.Generic;
using System.Text;
using System.Reflection;
namespace KMPSearch
{
class Program
{
static void Main(string[] args)
{
if (args.Length != 2)
{
Console.WriteLine("Usage: " + Environment.GetCommandLineArgs()[0] + " haystack needle");
}
else
{
KMPSearch search = new KMPSearch(args[0], args[1]);
int[] matches = search.MatchAll();
foreach (int i in matches)
Console.WriteLine("Match found at position " + i+1);
}
}
}
}
Try this, It'll return an array for text. ex: { "a line of" text "notepad" }:
string textToSearch = "\"a line of\" text \" notepad\"";
MatchCollection allPhrases = Regex.Matches(textToSearch, "(?<=\").*?(?=\")");
var RegArray = allPhrases.Cast<Match>().ToArray();
output: {"a line of","text"," notepad" }