My function try parse text - find Vector3 modify it, if exist next Vector func call self again.
Its work fine for small text files, but with big text files all time stackoverflow. Parser try find text if not find - make exit, text file all times make small and small - its not "dead cycle". Error in rand places, usually a here int pos = text.IndexOf(search); (Support module)
private void ParseText()
{
if (isWorking)
{
isWorking = IsVectorReplace();
ParseText();
ProcessShow();
}
else
{
ParserWorkComplite();
}
}
private bool IsVectorReplace()
{
//find index of substring start
int indexOfSubstringStart = fileContent.IndexOf(prefix);
if (indexOfSubstringStart == -1) { return false; } //vector3 not find
//find index of substring end
int nextCharIndx = indexOfSubstringStart;
Char ch = fileContent[nextCharIndx];
while (ch.ToString() != suffix)
{
ch = fileContent[nextCharIndx];
nextCharIndx++;
}
int startCutIndex = indexOfSubstringStart + prefix.Length;
int endCutIndex = nextCharIndx - (indexOfSubstringStart + prefix.Length + 1);
//search done. parse vector
string vectorTextContent = fileContent.Substring(startCutIndex, endCutIndex);
string oldVecText, newVecText;
string vectorNewTextContent = "";
parseVector3 = ConvertFromString(vectorTextContent);
parseVector3 += shiftVector3;
vectorNewTextContent = ConvertVect(parseVector3);
oldVecText = prefix + vectorTextContent + suffix;
newVecText = prefix + vectorNewTextContent + suffix;
string replaceText = ReplaceFirst(fileContent, oldVecText, newVecText);
//Debug.WriteLine("VEC OLD " + vectorTextContent + " VEC NEW "+ vectorNewTextContent);
int lastIndex = endCutIndex; // indexOfSubstringStart + newVecText.Length;
//save and cut file
string savePartText = fileContent.Remove(lastIndex);
partsOfFile.Add(savePartText);
fileContent = fileContent.Remove(0, savePartText.Length);
return true;
}
Some supp modules:
//find vec in string
Vector3 ConvertFromString(string input)
{
if (input != null)
{
var vals = input.Split(',').Select(s => s.Trim()).ToArray();
if (vals.Length == 3)
{
NumberStyles style = System.Globalization.NumberStyles.Any;
CultureInfo culture = CultureInfo.InvariantCulture;
Single v1, v2, v3;
if (Single.TryParse(vals[0], style, culture, out v1) && Single.TryParse(vals[1], style, culture, out v2) && Single.TryParse(vals[2], style, culture, out v3))
return new Vector3(v1, v2, v3);
else
throw new ArgumentException();
}
else
throw new ArgumentException();
}
else
throw new ArgumentException();
}
//convert vect to text back
private string ConvertVect(Vector3 v)
{
string data = "";
string v0, v1, v2;
v0 = v.X.ToString().Replace(",", ".");
v1 = v.Y.ToString().Replace(",", ".");
v2 = v.Z.ToString().Replace(",", ".");
data = v0 + "," + v1 + "," + v2 + "";
return data;
}
public string ReplaceFirst(string text, string search, string replace)
{
int pos = text.IndexOf(search);
if (pos < 0)
{
return text;
}
return text.Substring(0, pos) + replace + text.Substring(pos + search.Length);
}
This isn't exactly what you asked for, but based on the code you have provided, I would propose an entirely different approach using Regex
Here is some code to use as a starting point
You'll need this using statement in your .cs file
using System.Text.RegularExpressions;
//fileContent is assumed to be your original string containing Vector3 instances.
//prefix and suffix are assumed to be strings that mark the beginning and end of the Vector3 string respectively
string vectorRegex = $"(?<={ Regex.Escape(prefix) }).+?(?={ Regex.Escape(suffix) })";
string replacedContent = Regex.Replace(fileContent, vectorRegex, ModifyVector);
private static string ModifyVector(Match vectorMatch)
{
//This is adapted from your code
Vector3 parseVector3 = ConvertFromString(vectorMatch.Value);
parseVector3 += shiftVector3;
return ConvertVect(parseVector3);
}
The code above will do the following
Find all instances of text between prefix and suffix delimiters in fileContent
Invoke the ModifyVector function for each match
Replace the matched value with the output of the ModifyVector funtion
Related
I need to be able to extract a string between 2 tags for example: "00002" from "morenonxmldata<tag1>0002</tag1>morenonxmldata"
I am using C# and .NET 3.5.
Regex regex = new Regex("<tag1>(.*)</tag1>");
var v = regex.Match("morenonxmldata<tag1>0002</tag1>morenonxmldata");
string s = v.Groups[1].ToString();
Or (as mentioned in the comments) to match the minimal subset:
Regex regex = new Regex("<tag1>(.*?)</tag1>");
Regex class is in System.Text.RegularExpressions namespace.
Solution without need of regular expression:
string ExtractString(string s, string tag) {
// You should check for errors in real-world code, omitted for brevity
var startTag = "<" + tag + ">";
int startIndex = s.IndexOf(startTag) + startTag.Length;
int endIndex = s.IndexOf("</" + tag + ">", startIndex);
return s.Substring(startIndex, endIndex - startIndex);
}
A Regex approach using lazy match and back-reference:
foreach (Match match in Regex.Matches(
"morenonxmldata<tag1>0002</tag1>morenonxmldata<tag2>abc</tag2>asd",
#"<([^>]+)>(.*?)</\1>"))
{
Console.WriteLine("{0}={1}",
match.Groups[1].Value,
match.Groups[2].Value);
}
Extracting contents between two known values can be useful for later as well. So why not create an extension method for it. Here is what i do, Short and simple...
public static string GetBetween(this string content, string startString, string endString)
{
int Start=0, End=0;
if (content.Contains(startString) && content.Contains(endString))
{
Start = content.IndexOf(startString, 0) + startString.Length;
End = content.IndexOf(endString, Start);
return content.Substring(Start, End - Start);
}
else
return string.Empty;
}
string input = "Exemple of value between two string FirstString text I want to keep SecondString end of my string";
var match = Regex.Match(input, #"FirstString (.+?) SecondString ").Groups[1].Value;
To get Single/Multiple values without regular expression
// For Single
var value = inputString.Split("<tag1>", "</tag1>")[1];
// For Multiple
var values = inputString.Split("<tag1>", "</tag1>").Where((_, index) => index % 2 != 0);
For future reference, I found this code snippet at http://www.mycsharpcorner.com/Post.aspx?postID=15 If you need to search for different "tags" it works very well.
public static string[] GetStringInBetween(string strBegin,
string strEnd, string strSource,
bool includeBegin, bool includeEnd)
{
string[] result ={ "", "" };
int iIndexOfBegin = strSource.IndexOf(strBegin);
if (iIndexOfBegin != -1)
{
// include the Begin string if desired
if (includeBegin)
iIndexOfBegin -= strBegin.Length;
strSource = strSource.Substring(iIndexOfBegin
+ strBegin.Length);
int iEnd = strSource.IndexOf(strEnd);
if (iEnd != -1)
{
// include the End string if desired
if (includeEnd)
iEnd += strEnd.Length;
result[0] = strSource.Substring(0, iEnd);
// advance beyond this segment
if (iEnd + strEnd.Length < strSource.Length)
result[1] = strSource.Substring(iEnd
+ strEnd.Length);
}
}
else
// stay where we are
result[1] = strSource;
return result;
}
I strip before and after data.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Text.RegularExpressions;
namespace testApp
{
class Program
{
static void Main(string[] args)
{
string tempString = "morenonxmldata<tag1>0002</tag1>morenonxmldata";
tempString = Regex.Replace(tempString, "[\\s\\S]*<tag1>", "");//removes all leading data
tempString = Regex.Replace(tempString, "</tag1>[\\s\\S]*", "");//removes all trailing data
Console.WriteLine(tempString);
Console.ReadLine();
}
}
}
Without RegEx, with some must-have value checking
public static string ExtractString(string soapMessage, string tag)
{
if (string.IsNullOrEmpty(soapMessage))
return soapMessage;
var startTag = "<" + tag + ">";
int startIndex = soapMessage.IndexOf(startTag);
startIndex = startIndex == -1 ? 0 : startIndex + startTag.Length;
int endIndex = soapMessage.IndexOf("</" + tag + ">", startIndex);
endIndex = endIndex > soapMessage.Length || endIndex == -1 ? soapMessage.Length : endIndex;
return soapMessage.Substring(startIndex, endIndex - startIndex);
}
public string between2finer(string line, string delimiterFirst, string delimiterLast)
{
string[] splitterFirst = new string[] { delimiterFirst };
string[] splitterLast = new string[] { delimiterLast };
string[] splitRes;
string buildBuffer;
splitRes = line.Split(splitterFirst, 100000, System.StringSplitOptions.RemoveEmptyEntries);
buildBuffer = splitRes[1];
splitRes = buildBuffer.Split(splitterLast, 100000, System.StringSplitOptions.RemoveEmptyEntries);
return splitRes[0];
}
private void button1_Click(object sender, EventArgs e)
{
string manyLines = "Received: from exim by isp2.ihc.ru with local (Exim 4.77) \nX-Failed-Recipients: rmnokixm#gmail.com\nFrom: Mail Delivery System <Mailer-Daemon#isp2.ihc.ru>";
MessageBox.Show(between2finer(manyLines, "X-Failed-Recipients: ", "\n"));
}
I'm working on French String in .NET
Decoding a Mail body , I receive "Chasn=C3=A9 sur illet"
I would like to get "Chasné sur illet"
and i don't find any solution aver 2 days web search.
C# ou VB.NET
Can anyone helps me ?
thanks
Or the easiest of all, just use the QuotedPrintableDecoder from my MimeKit library:
static string DecodeQuotedPrintable (string input, string charset)
{
var decoder = new QuotedPrintableDecoder ();
var buffer = Encoding.ASCII.GetBytes (input);
var output = new byte[decoder.EstimateOutputLength (buffer.Length)];
int used = decoder.Decode (buffer, 0, buffer.Length, output);
var encoding = Encoding.GetEncoding (charset);
return encoding.GetString (output, 0, used);
}
Note that the other answers above assume the decoded content will be ASCII or UTF-8, but that isn't necessarily the case. You'll need to get the charset parameter from the Content-Type header of the MIME part that you are decoding.
Of course... if you don't know how to get that info, you could simply use my awesome MailKit library to get the MIME part from IMAP and have it do all of this work for you.
This is UTF8 encoding.
Using this post:
http://www.dpit.co.uk/decoding-quoted-printable-email-in-c/
Here is the code (don't forget to accept the answer if helped):
using System;
using System.Text;
using System.Text.RegularExpressions;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine(DecodeQuotedPrintable("Chasn=C3=A9 sur illet"));
Console.ReadKey();
}
static string DecodeQuotedPrintable(string input)
{
var occurences = new Regex(#"(=[0-9A-Z][0-9A-Z])+", RegexOptions.Multiline);
var matches = occurences.Matches(input);
foreach (Match m in matches)
{
byte[] bytes = new byte[m.Value.Length / 3];
for (int i = 0; i < bytes.Length; i++)
{
string hex = m.Value.Substring(i * 3 + 1, 2);
int iHex = Convert.ToInt32(hex, 16);
bytes[i] = Convert.ToByte(iHex);
}
input = input.Replace(m.Value, Encoding.UTF8.GetString(bytes));
}
return input.Replace("=rn", "");
}
}
}
From : https://stackoverflow.com/a/36803911/6403521
My solution :
[TestMethod]
public void TestMethod1()
{
Assert.AreEqual("La Bouichère", quotedprintable("La Bouich=C3=A8re", "utf-8"));
Assert.AreEqual("Chasné sur illet", quotedprintable("Chasn=C3=A9 sur illet", "utf-8"));
Assert.AreEqual("é è", quotedprintable("=C3=A9 =C3=A8", "utf-8"));
}
private string quotedprintable(string pStrIn, string encoding)
{
String strOut = pStrIn.Replace("=\r\n", "");
// Find the first =
int position = strOut.IndexOf("=");
while (position != -1)
{
// String before the =
string leftpart = strOut.Substring(0, position);
// get the QuotedPrintable String in a ArrayList
System.Collections.ArrayList hex = new System.Collections.ArrayList();
// The first Part
hex.Add(strOut.Substring(1 + position, 2));
// Look for the next parts
while (position + 3 < strOut.Length && strOut.Substring(position + 3, 1) == "=")
{
position = position + 3;
hex.Add(strOut.Substring(1 + position, 2));
}
// In the hex Array, we have two items
// Convert using the GetEncoding Function
byte[] bytes = new byte[hex.Count];
for (int i = 0; i < hex.Count; i++)
{
bytes[i] = System.Convert.ToByte(new string(((string)hex[i]).ToCharArray()), 16);
}
string equivalent = System.Text.Encoding.GetEncoding(encoding).GetString(bytes);
// Part of the orignal String after the last QP Symbol
string rightpart = strOut.Substring(position + 3);
// Re build the String
strOut = leftpart + equivalent + rightpart;
// find the new QP Position
position = leftpart.Length + equivalent.Length;
if (rightpart.Length == 0)
{
position = -1;
}
else
{
position = strOut.IndexOf("=", position + 1);
}
}
return strOut;
}
We had an issue with this method - that it is VERY slow.
The following enhanced performance A LOT
public static string FromMailTransferEncoding(this string messageText, Encoding enc, string transferEncoding)
{
if (string.IsNullOrEmpty(transferEncoding))
return messageText;
if ("quoted-printable".Equals(transferEncoding.ToLower()))
{
StringBuilder sb = new StringBuilder();
string delimitorRegEx = #"=[\r][\n]";
string[] parts = Regex.Split(messageText, delimitorRegEx);
foreach (string part in parts)
{
string subPart = part;
Regex occurences = new Regex(#"(=[0-9A-Z][0-9A-Z])+", RegexOptions.Multiline);
MatchCollection matches = occurences.Matches(subPart);
foreach (Match m in matches)
{
byte[] bytes = new byte[m.Value.Length / 3];
for (int i = 0; i < bytes.Length; i++)
{
string hex = m.Value.Substring(i * 3 + 1, 2);
int iHex = Convert.ToInt32(hex, 16);
bytes[i] = Convert.ToByte(iHex);
}
subPart = occurences.Replace(subPart, enc.GetString(bytes), 1);
}
sb.Append(subPart);
}
return sb.ToString();
}
return messageText;
}
static string ConverFromHex(string source)
{
string target = string.Empty;
int startPos = source.IndexOf('=', 0);
int prevStartPos = 0;
while (startPos >= 0)
{
// concat with substring from source
target += source.Substring(prevStartPos, startPos - prevStartPos);
// next offset
startPos++;
// update prev pos
prevStartPos = startPos;
// get substring
string hexString = source.Substring(startPos, 2);
// get int equiv
int hexNum = 0;
if (int.TryParse(hexString, System.Globalization.NumberStyles.AllowHexSpecifier, System.Globalization.CultureInfo.InvariantCulture, out hexNum))
{
// add to target string
target += (char)hexNum;
// add hex length
prevStartPos += 2;
}
// next occurence
startPos = source.IndexOf('=', startPos);
}
// add rest of source
target += source.Substring(prevStartPos);
return target;
}
I am using the following C# code to modify a lowercase letter to uppercase after a single quote:
public virtual string FirstName
{
get { return _firstName; }
set
{
if (value != null)
{
int pos = value.IndexOf("'", 0);
int strlength = value.Length - 1;
if (pos >= 0 && pos != strlength)
{
string temp = value[pos + 1].ToString();
temp = temp.ToUpper();
value = value.Remove(pos + 1, 1);
value = value.Insert(pos + 1, temp);
}
}
}
}
To me this looks like overkill. Is there an easier way to achieve the desired result:
Value: Mc'donald
Expected: Mc'Donald
here is without regex
int pos = data.IndexOf("'");
if (pos >= 0 && pos < data.Length - 1)
{
StringBuilder sbl = new StringBuilder(data);
sbl[pos + 1] = char.ToUpper(sbl[pos + 1]);
data = sbl.ToString();
}
Since you're open to Regex, would this overload of the Regex.Replace do what you need?
Regex.Replace Method (String, MatchEvaluator)
Here's a modified version of the example given at the link above. I've changed it to use the '\w pattern and to return the match in upper case.
using System;
using System.Text.RegularExpressions;
class RegExSample
{
static string CapText(Match m)
{
// Return the match in upper case
return m.ToString().ToUpperInvariant();
}
static void Main()
{
string text = "Mc'donald";
System.Console.WriteLine("text=[" + text + "]");
Regex rx = new Regex(#"'\w");
string result = rx.Replace(text, new MatchEvaluator(RegExSample.CapText));
System.Console.WriteLine("result=[" + result + "]");
}
}
Perhaps regular expressions?
string value = "Mc'donald";
string found = Regex.Match(value, "'[\\w]").Value;
string result = value.Replace(found, found.ToUpper());
Console.WriteLine(result); // Mc'Donald
I have a txt file as a string, and I need to find words between two characters and Ltrim/Rtrim everything else. It may have to be conditional because the two characters may change depending on the string.
Example:
car= (data between here I want) ;
car = (data between here I want) </value>
Code:
int pos = st.LastIndexOf("car=", StringComparison.OrdinalIgnoreCase);
if (pos >= 0)
{
server = st.Substring(0, pos);..............
}
This is a simple extension method I use:
public static string Between(this string src, string findfrom, string findto)
{
int start = src.IndexOf(findfrom);
int to = src.IndexOf(findto, start + findfrom.Length);
if (start < 0 || to < 0) return "";
string s = src.Substring(
start + findfrom.Length,
to - start - findfrom.Length);
return s;
}
With this you can use
string valueToFind = sourceString.Between("car=", "</value>")
You can also try this:
public static string Between(this string src, string findfrom,
params string[] findto)
{
int start = src.IndexOf(findfrom);
if (start < 0) return "";
foreach (string sto in findto)
{
int to = src.IndexOf(sto, start + findfrom.Length);
if (to >= 0) return
src.Substring(
start + findfrom.Length,
to - start - findfrom.Length);
}
return "";
}
With this you can give multiple ending tokens (their order is important)
string valueToFind = sourceString.Between("car=", ";", "</value>")
You could use regex
var input = "car= (data between here I want) ;";
var pattern = #"car=\s*(.*?)\s*;"; // where car= is the first delimiter and ; is the second one
var result = Regex.Match(input, pattern).Groups[1].Value;
I have like a three word expression: "Shut The Door" and I want to find it in a sentence. Since They are kind of seperated by space what would be the best solution for it.
If you have the string:
string sample = "If you know what's good for you, you'll shut the door!";
And you want to find where it is in a sentence, you can use the IndexOf method.
int index = sample.IndexOf("shut the door");
// index will be 42
A non -1 answer means the string has been located. -1 means it does not exist in the string. Please note that the search string ("shut the door") is case sensitive.
Use build in Regex.Match Method for matching strings.
string text = "One car red car blue car";
string pat = #"(\w+)\s+(car)";
// Compile the regular expression.
Regex r = new Regex(pat, RegexOptions.IgnoreCase);
// Match the regular expression pattern against a text string.
Match m = r.Match(text);
int matchCount = 0;
while (m.Success)
{
Console.WriteLine("Match"+ (++matchCount));
for (int i = 1; i <= 2; i++)
{
Group g = m.Groups[i];
Console.WriteLine("Group"+i+"='" + g + "'");
CaptureCollection cc = g.Captures;
for (int j = 0; j < cc.Count; j++)
{
Capture c = cc[j];
System.Console.WriteLine("Capture"+j+"='" + c + "', Position="+c.Index);
}
}
m = m.NextMatch();
}
http://msdn.microsoft.com/en-us/library/system.text.regularexpressions.regex.match(v=vs.71).aspx
http://support.microsoft.com/kb/308252
if (string1.indexOf(string2) >= 0)
...
The spaces are nothing special, they are just characters, so you can find a string like this like yuo would find any other string in your sentence, for example using "indexOf" if you need the position, or just "Contains" if you need to know if it exists or not.
E.g.
string sentence = "foo bar baz";
string phrase = "bar baz";
Console.WriteLine(sentence.Contains(phrase)); // True
Here is some C# code to find a substrings using a start string and end string point but you can use as a base and modify (i.e. remove need for end string) to just find your string...
2 versions, one to just find the first instance of a substring, other returns a dictionary of all starting positions of the substring and the actual string.
public Dictionary<int, string> GetSubstringDic(string start, string end, string source, bool includeStartEnd, bool caseInsensitive)
{
int startIndex = -1;
int endIndex = -1;
int length = -1;
int sourceLength = source.Length;
Dictionary<int, string> result = new Dictionary<int, string>();
try
{
//if just want to find string, case insensitive
if (caseInsensitive)
{
source = source.ToLower();
start = start.ToLower();
end = end.ToLower();
}
//does start string exist
startIndex = source.IndexOf(start);
if (startIndex != -1)
{
//start to check for each instance of matches for the length of the source string
while (startIndex < sourceLength && startIndex > -1)
{
//does end string exist?
endIndex = source.IndexOf(end, startIndex + 1);
if (endIndex != -1)
{
//if we want to get length of string including the start and end strings
if (includeStartEnd)
{
//make sure to include the end string
length = (endIndex + end.Length) - startIndex;
}
else
{
//change start index to not include the start string
startIndex = startIndex + start.Length;
length = endIndex - startIndex;
}
//add to dictionary
result.Add(startIndex, source.Substring(startIndex, length));
//move start position up
startIndex = source.IndexOf(start, endIndex + 1);
}
else
{
//no end so break out of while;
break;
}
}
}
}
catch (Exception ex)
{
//Notify of Error
result = new Dictionary<int, string>();
StringBuilder g_Error = new StringBuilder();
g_Error.AppendLine("GetSubstringDic: " + ex.Message.ToString());
g_Error.AppendLine(ex.StackTrace.ToString());
}
return result;
}
public string GetSubstring(string start, string end, string source, bool includeStartEnd, bool caseInsensitive)
{
int startIndex = -1;
int endIndex = -1;
int length = -1;
int sourceLength = source.Length;
string result = string.Empty;
try
{
if (caseInsensitive)
{
source = source.ToLower();
start = start.ToLower();
end = end.ToLower();
}
startIndex = source.IndexOf(start);
if (startIndex != -1)
{
endIndex = source.IndexOf(end, startIndex + 1);
if (endIndex != -1)
{
if (includeStartEnd)
{
length = (endIndex + end.Length) - startIndex;
}
else
{
startIndex = startIndex + start.Length;
length = endIndex - startIndex;
}
result = source.Substring(startIndex, length);
}
}
}
catch (Exception ex)
{
//Notify of Error
result = string.Empty;
StringBuilder g_Error = new StringBuilder();
g_Error.AppendLine("GetSubstring: " + ex.Message.ToString());
g_Error.AppendLine(ex.StackTrace.ToString());
}
return result;
}
You may want to make sure the check ignores the case of both phrases.
string theSentence = "I really want you to shut the door.";
string thePhrase = "Shut The Door";
bool phraseIsPresent = theSentence.ToUpper().Contains(thePhrase.ToUpper());
int phraseStartsAt = theSentence.IndexOf(
thePhrase,
StringComparison.InvariantCultureIgnoreCase);
Console.WriteLine("Is the phrase present? " + phraseIsPresent);
Console.WriteLine("The phrase starts at character: " + phraseStartsAt);
This outputs:
Is the phrase present? True
The phrase starts at character: 21