This question already has answers here:
How to split csv whose columns may contain comma
(9 answers)
Closed 4 years ago.
I have the following comma-separated string that I need to split. The problem is that some of the content is within quotes and contains commas that shouldn't be used in the split.
String:
111,222,"33,44,55",666,"77,88","99"
I want the output:
111
222
33,44,55
666
77,88
99
I have tried this:
(?:,?)((?<=")[^"]+(?=")|[^",]+)
But it reads the comma between "77,88","99" as a hit and I get the following output:
111
222
33,44,55
666
77,88
,
99
Depending on your needs you may not be able to use a csv parser, and may in fact want to re-invent the wheel!!
You can do so with some simple regex
(?:^|,)(\"(?:[^\"]+|\"\")*\"|[^,]*)
This will do the following:
(?:^|,) = Match expression "Beginning of line or string ,"
(\"(?:[^\"]+|\"\")*\"|[^,]*) = A numbered capture group, this will select between 2 alternatives:
stuff in quotes
stuff between commas
This should give you the output you are looking for.
Example code in C#
static Regex csvSplit = new Regex("(?:^|,)(\"(?:[^\"]+|\"\")*\"|[^,]*)", RegexOptions.Compiled);
public static string[] SplitCSV(string input)
{
List<string> list = new List<string>();
string curr = null;
foreach (Match match in csvSplit.Matches(input))
{
curr = match.Value;
if (0 == curr.Length)
{
list.Add("");
}
list.Add(curr.TrimStart(','));
}
return list.ToArray();
}
private void button1_Click(object sender, RoutedEventArgs e)
{
Console.WriteLine(SplitCSV("111,222,\"33,44,55\",666,\"77,88\",\"99\""));
}
Warning As per #MrE's comment - if a rogue new line character appears in a badly formed csv file and you end up with an uneven ("string) you'll get catastrophic backtracking (https://www.regular-expressions.info/catastrophic.html) in your regex and your system will likely crash (like our production system did). Can easily be replicated in Visual Studio and as I've discovered will crash it. A simple try/catch will not trap this issue either.
You should use:
(?:^|,)(\"(?:[^\"])*\"|[^,]*)
instead
Fast and easy:
public static string[] SplitCsv(string line)
{
List<string> result = new List<string>();
StringBuilder currentStr = new StringBuilder("");
bool inQuotes = false;
for (int i = 0; i < line.Length; i++) // For each character
{
if (line[i] == '\"') // Quotes are closing or opening
inQuotes = !inQuotes;
else if (line[i] == ',') // Comma
{
if (!inQuotes) // If not in quotes, end of current string, add it to result
{
result.Add(currentStr.ToString());
currentStr.Clear();
}
else
currentStr.Append(line[i]); // If in quotes, just add it
}
else // Add any other character to current string
currentStr.Append(line[i]);
}
result.Add(currentStr.ToString());
return result.ToArray(); // Return array of all strings
}
With this string as input :
111,222,"33,44,55",666,"77,88","99"
It will return :
111
222
33,44,55
666
77,88
99
i really like jimplode's answer, but I think a version with yield return is a little bit more useful, so here it is:
public IEnumerable<string> SplitCSV(string input)
{
Regex csvSplit = new Regex("(?:^|,)(\"(?:[^\"]+|\"\")*\"|[^,]*)", RegexOptions.Compiled);
foreach (Match match in csvSplit.Matches(input))
{
yield return match.Value.TrimStart(',');
}
}
Maybe it's even more useful to have it like an extension method:
public static class StringHelper
{
public static IEnumerable<string> SplitCSV(this string input)
{
Regex csvSplit = new Regex("(?:^|,)(\"(?:[^\"]+|\"\")*\"|[^,]*)", RegexOptions.Compiled);
foreach (Match match in csvSplit.Matches(input))
{
yield return match.Value.TrimStart(',');
}
}
}
This regular expression works without the need to loop through values and TrimStart(','), like in the accepted answer:
((?<=\")[^\"]*(?=\"(,|$)+)|(?<=,|^)[^,\"]*(?=,|$))
Here is the implementation in C#:
string values = "111,222,\"33,44,55\",666,\"77,88\",\"99\"";
MatchCollection matches = new Regex("((?<=\")[^\"]*(?=\"(,|$)+)|(?<=,|^)[^,\"]*(?=,|$))").Matches(values);
foreach (var match in matches)
{
Console.WriteLine(match);
}
Outputs
111
222
33,44,55
666
77,88
99
None of these answers work when the string has a comma inside quotes, as in "value, 1", or escaped double-quotes, as in "value ""1""", which are valid CSV that should be parsed as value, 1 and value "1", respectively.
This will also work with the tab-delimited format if you pass in a tab instead of a comma as your delimiter.
public static IEnumerable<string> SplitRow(string row, char delimiter = ',')
{
var currentString = new StringBuilder();
var inQuotes = false;
var quoteIsEscaped = false; //Store when a quote has been escaped.
row = string.Format("{0}{1}", row, delimiter); //We add new cells at the delimiter, so append one for the parser.
foreach (var character in row.Select((val, index) => new {val, index}))
{
if (character.val == delimiter) //We hit a delimiter character...
{
if (!inQuotes) //Are we inside quotes? If not, we've hit the end of a cell value.
{
Console.WriteLine(currentString);
yield return currentString.ToString();
currentString.Clear();
}
else
{
currentString.Append(character.val);
}
} else {
if (character.val != ' ')
{
if(character.val == '"') //If we've hit a quote character...
{
if(character.val == '\"' && inQuotes) //Does it appear to be a closing quote?
{
if (row[character.index + 1] == character.val) //If the character afterwards is also a quote, this is to escape that (not a closing quote).
{
quoteIsEscaped = true; //Flag that we are escaped for the next character. Don't add the escaping quote.
}
else if (quoteIsEscaped)
{
quoteIsEscaped = false; //This is an escaped quote. Add it and revert quoteIsEscaped to false.
currentString.Append(character.val);
}
else
{
inQuotes = false;
}
}
else
{
if (!inQuotes)
{
inQuotes = true;
}
else
{
currentString.Append(character.val); //...It's a quote inside a quote.
}
}
}
else
{
currentString.Append(character.val);
}
}
else
{
if (!string.IsNullOrWhiteSpace(currentString.ToString())) //Append only if not new cell
{
currentString.Append(character.val);
}
}
}
}
}
With minor updates to the function provided by "Chad Hedgcock".
Updates are on:
Line 26: character.val == '\"' - This can never be true due to the check made on Line 24. i.e. character.val == '"'
Line 28: if (row[character.index + 1] == character.val) added !quoteIsEscaped to escape 3 consecutive quotes.
public static IEnumerable<string> SplitRow(string row, char delimiter = ',')
{
var currentString = new StringBuilder();
var inQuotes = false;
var quoteIsEscaped = false; //Store when a quote has been escaped.
row = string.Format("{0}{1}", row, delimiter); //We add new cells at the delimiter, so append one for the parser.
foreach (var character in row.Select((val, index) => new {val, index}))
{
if (character.val == delimiter) //We hit a delimiter character...
{
if (!inQuotes) //Are we inside quotes? If not, we've hit the end of a cell value.
{
//Console.WriteLine(currentString);
yield return currentString.ToString();
currentString.Clear();
}
else
{
currentString.Append(character.val);
}
} else {
if (character.val != ' ')
{
if(character.val == '"') //If we've hit a quote character...
{
if(character.val == '"' && inQuotes) //Does it appear to be a closing quote?
{
if (row[character.index + 1] == character.val && !quoteIsEscaped) //If the character afterwards is also a quote, this is to escape that (not a closing quote).
{
quoteIsEscaped = true; //Flag that we are escaped for the next character. Don't add the escaping quote.
}
else if (quoteIsEscaped)
{
quoteIsEscaped = false; //This is an escaped quote. Add it and revert quoteIsEscaped to false.
currentString.Append(character.val);
}
else
{
inQuotes = false;
}
}
else
{
if (!inQuotes)
{
inQuotes = true;
}
else
{
currentString.Append(character.val); //...It's a quote inside a quote.
}
}
}
else
{
currentString.Append(character.val);
}
}
else
{
if (!string.IsNullOrWhiteSpace(currentString.ToString())) //Append only if not new cell
{
currentString.Append(character.val);
}
}
}
}
}
For Jay's answer, if you use a 2nd boolean then you can have nested double-quotes inside single-quotes and vice-versa.
private string[] splitString(string stringToSplit)
{
char[] characters = stringToSplit.ToCharArray();
List<string> returnValueList = new List<string>();
string tempString = "";
bool blockUntilEndQuote = false;
bool blockUntilEndQuote2 = false;
int characterCount = 0;
foreach (char character in characters)
{
characterCount = characterCount + 1;
if (character == '"' && !blockUntilEndQuote2)
{
if (blockUntilEndQuote == false)
{
blockUntilEndQuote = true;
}
else if (blockUntilEndQuote == true)
{
blockUntilEndQuote = false;
}
}
if (character == '\'' && !blockUntilEndQuote)
{
if (blockUntilEndQuote2 == false)
{
blockUntilEndQuote2 = true;
}
else if (blockUntilEndQuote2 == true)
{
blockUntilEndQuote2 = false;
}
}
if (character != ',')
{
tempString = tempString + character;
}
else if (character == ',' && (blockUntilEndQuote == true || blockUntilEndQuote2 == true))
{
tempString = tempString + character;
}
else
{
returnValueList.Add(tempString);
tempString = "";
}
if (characterCount == characters.Length)
{
returnValueList.Add(tempString);
tempString = "";
}
}
string[] returnValue = returnValueList.ToArray();
return returnValue;
}
The original version
Currently I use the following regex:
public static Regex regexCSVSplit = new Regex(#"(?x:(
(?<FULL>
(^|[,;\t\r\n])\s*
( (?<QUODAT> (?<QUO>[""'])(?<DAT>([^,;\t\r\n]|(?<!\k<QUO>\s*)[,;\t\r\n])*)\k<QUO>) |
(?<QUODAT> (?<DAT> [^""',;\s\r\n]* )) )
(?=\s*([,;\t\r\n]|$))
) |
(?<FULL>
(^|[\s\t\r\n])
( (?<QUODAT> (?<QUO>[""'])(?<DAT> [^""',;\s\t\r\n]* )\k<QUO>) |
(?<QUODAT> (?<DAT> [^""',;\s\t\r\n]* )) )
(?=[,;\s\t\r\n]|$)
)
))", RegexOptions.Compiled);
This solution can handle pretty chaotic cases too like below:
This is how to feed the result into an array:
var data = regexCSVSplit.Matches(line_to_process).Cast<Match>().
Select(x => x.Groups["DAT"].Value).ToArray();
See this example in action HERE
Note: The regular expression contains two set of <FULL> block and each of them contains two <QUODAT> block separated by "or" (|). Depending on your task you may only need one of them.
Note: That this regular expression gives us one string array, and works on single line with or without <carrier return> and/or <line feed>.
Simplified version
The following regular expression will already cover many complex cases:
public static Regex regexCSVSplit = new Regex(#"(?x:(
(?<FULL>
(^|[,;\t\r\n])\s*
(?<QUODAT> (?<QUO>[""'])(?<DAT>([^,;\t\r\n]|(?<!\k<QUO>\s*)[,;\t\r\n])*)\k<QUO>)
(?=\s*([,;\t\r\n]|$))
)
))", RegexOptions.Compiled);
See this example in action: HERE
It can process complex, easy and empty items too:
This is how to feed the result into an array:
var data = regexCSVSplit.Matches(line_to_process).Cast<Match>().
Select(x => x.Groups["DAT"].Value).ToArray();
The main rule here is that every item may contain anything but the <quotation mark><separators><comma> sequence AND each item shall being and end with the same <quotation mark>.
<quotation mark>: <">, <'>
<comma>: <,>, <;>, <tab>, <carrier return>, <line feed>
Edit notes: I added some more explanation to make it easier to understand and replaces the text "CO" with "QUO".
Try this:
string s = #"111,222,""33,44,55"",666,""77,88"",""99""";
List<string> result = new List<string>();
var splitted = s.Split('"').ToList<string>();
splitted.RemoveAll(x => x == ",");
foreach (var it in splitted)
{
if (it.StartsWith(",") || it.EndsWith(","))
{
var tmp = it.TrimEnd(',').TrimStart(',');
result.AddRange(tmp.Split(','));
}
else
{
if(!string.IsNullOrEmpty(it)) result.Add(it);
}
}
//Results:
foreach (var it in result)
{
Console.WriteLine(it);
}
I know I'm a bit late to this, but for searches, here is how I did what you are asking about in C sharp
private string[] splitString(string stringToSplit)
{
char[] characters = stringToSplit.ToCharArray();
List<string> returnValueList = new List<string>();
string tempString = "";
bool blockUntilEndQuote = false;
int characterCount = 0;
foreach (char character in characters)
{
characterCount = characterCount + 1;
if (character == '"')
{
if (blockUntilEndQuote == false)
{
blockUntilEndQuote = true;
}
else if (blockUntilEndQuote == true)
{
blockUntilEndQuote = false;
}
}
if (character != ',')
{
tempString = tempString + character;
}
else if (character == ',' && blockUntilEndQuote == true)
{
tempString = tempString + character;
}
else
{
returnValueList.Add(tempString);
tempString = "";
}
if (characterCount == characters.Length)
{
returnValueList.Add(tempString);
tempString = "";
}
}
string[] returnValue = returnValueList.ToArray();
return returnValue;
}
Don't reinvent a CSV parser, try FileHelpers.
I needed something a little more robust, so I took from here and created this... This solution is a little less elegant and a little more verbose, but in my testing (with a 1,000,000 row sample), I found this to be 2 to 3 times faster. Plus it handles non-escaped, embedded quotes. I used string delimiter and qualifiers instead of chars because of the requirements of my solution. I found it more difficult than I expected to find a good, generic CSV parser so I hope this parsing algorithm can help someone.
public static string[] SplitRow(string record, string delimiter, string qualifier, bool trimData)
{
// In-Line for example, but I implemented as string extender in production code
Func <string, int, int> IndexOfNextNonWhiteSpaceChar = delegate (string source, int startIndex)
{
if (startIndex >= 0)
{
if (source != null)
{
for (int i = startIndex; i < source.Length; i++)
{
if (!char.IsWhiteSpace(source[i]))
{
return i;
}
}
}
}
return -1;
};
var results = new List<string>();
var result = new StringBuilder();
var inQualifier = false;
var inField = false;
// We add new columns at the delimiter, so append one for the parser.
var row = $"{record}{delimiter}";
for (var idx = 0; idx < row.Length; idx++)
{
// A delimiter character...
if (row[idx]== delimiter[0])
{
// Are we inside qualifier? If not, we've hit the end of a column value.
if (!inQualifier)
{
results.Add(trimData ? result.ToString().Trim() : result.ToString());
result.Clear();
inField = false;
}
else
{
result.Append(row[idx]);
}
}
// NOT a delimiter character...
else
{
// ...Not a space character
if (row[idx] != ' ')
{
// A qualifier character...
if (row[idx] == qualifier[0])
{
// Qualifier is closing qualifier...
if (inQualifier && row[IndexOfNextNonWhiteSpaceChar(row, idx + 1)] == delimiter[0])
{
inQualifier = false;
continue;
}
else
{
// ...Qualifier is opening qualifier
if (!inQualifier)
{
inQualifier = true;
}
// ...It's a qualifier inside a qualifier.
else
{
inField = true;
result.Append(row[idx]);
}
}
}
// Not a qualifier character...
else
{
result.Append(row[idx]);
inField = true;
}
}
// ...A space character
else
{
if (inQualifier || inField)
{
result.Append(row[idx]);
}
}
}
}
return results.ToArray<string>();
}
Some test code:
//var input = "111,222,\"33,44,55\",666,\"77,88\",\"99\"";
var input =
"111, 222, \"99\",\"33,44,55\" , \"666 \"mark of a man\"\", \" spaces \"77,88\" \"";
Console.WriteLine("Split with trim");
Console.WriteLine("---------------");
var result = SplitRow(input, ",", "\"", true);
foreach (var r in result)
{
Console.WriteLine(r);
}
Console.WriteLine("");
// Split 2
Console.WriteLine("Split with no trim");
Console.WriteLine("------------------");
var result2 = SplitRow(input, ",", "\"", false);
foreach (var r in result2)
{
Console.WriteLine(r);
}
Console.WriteLine("");
// Time Trial 1
Console.WriteLine("Experimental Process (1,000,000) iterations");
Console.WriteLine("-------------------------------------------");
watch = Stopwatch.StartNew();
for (var i = 0; i < 1000000; i++)
{
var x1 = SplitRow(input, ",", "\"", false);
}
watch.Stop();
elapsedMs = watch.ElapsedMilliseconds;
Console.WriteLine($"Total Process Time: {string.Format("{0:0.###}", elapsedMs / 1000.0)} Seconds");
Console.WriteLine("");
Results
Split with trim
---------------
111
222
99
33,44,55
666 "mark of a man"
spaces "77,88"
Split with no trim
------------------
111
222
99
33,44,55
666 "mark of a man"
spaces "77,88"
Original Process (1,000,000) iterations
-------------------------------
Total Process Time: 7.538 Seconds
Experimental Process (1,000,000) iterations
--------------------------------------------
Total Process Time: 3.363 Seconds
I once had to do something similar and in the end I got stuck with Regular Expressions. The inability for Regex to have state makes it pretty tricky - I just ended up writing a simple little parser.
If you're doing CSV parsing you should just stick to using a CSV parser - don't reinvent the wheel.
Here is my fastest implementation based upon string raw pointer manipulation:
string[] FastSplit(string sText, char? cSeparator = null, char? cQuotes = null)
{
string[] oTokens;
if (null == cSeparator)
{
cSeparator = DEFAULT_PARSEFIELDS_SEPARATOR;
}
if (null == cQuotes)
{
cQuotes = DEFAULT_PARSEFIELDS_QUOTE;
}
unsafe
{
fixed (char* lpText = sText)
{
#region Fast array estimatation
char* lpCurrent = lpText;
int nEstimatedSize = 0;
while (0 != *lpCurrent)
{
if (cSeparator == *lpCurrent)
{
nEstimatedSize++;
}
lpCurrent++;
}
nEstimatedSize++; // Add EOL char(s)
string[] oEstimatedTokens = new string[nEstimatedSize];
#endregion
#region Parsing
char[] oBuffer = new char[sText.Length];
int nIndex = 0;
int nTokens = 0;
lpCurrent = lpText;
while (0 != *lpCurrent)
{
if (cQuotes == *lpCurrent)
{
// Quotes parsing
lpCurrent++; // Skip quote
nIndex = 0; // Reset buffer
while (
(0 != *lpCurrent)
&& (cQuotes != *lpCurrent)
)
{
oBuffer[nIndex] = *lpCurrent; // Store char
lpCurrent++; // Move source cursor
nIndex++; // Move target cursor
}
}
else if (cSeparator == *lpCurrent)
{
// Separator char parsing
oEstimatedTokens[nTokens++] = new string(oBuffer, 0, nIndex); // Store token
nIndex = 0; // Skip separator and Reset buffer
}
else
{
// Content parsing
oBuffer[nIndex] = *lpCurrent; // Store char
nIndex++; // Move target cursor
}
lpCurrent++; // Move source cursor
}
// Recover pending buffer
if (nIndex > 0)
{
// Store token
oEstimatedTokens[nTokens++] = new string(oBuffer, 0, nIndex);
}
// Build final tokens list
if (nTokens == nEstimatedSize)
{
oTokens = oEstimatedTokens;
}
else
{
oTokens = new string[nTokens];
Array.Copy(oEstimatedTokens, 0, oTokens, 0, nTokens);
}
#endregion
}
}
// Epilogue
return oTokens;
}
Try this
private string[] GetCommaSeperatedWords(string sep, string line)
{
List<string> list = new List<string>();
StringBuilder word = new StringBuilder();
int doubleQuoteCount = 0;
for (int i = 0; i < line.Length; i++)
{
string chr = line[i].ToString();
if (chr == "\"")
{
if (doubleQuoteCount == 0)
doubleQuoteCount++;
else
doubleQuoteCount--;
continue;
}
if (chr == sep && doubleQuoteCount == 0)
{
list.Add(word.ToString());
word = new StringBuilder();
continue;
}
word.Append(chr);
}
list.Add(word.ToString());
return list.ToArray();
}
This is Chad's answer rewritten with state based logic. His answered failed for me when it came across """BRAD""" as a field. That should return "BRAD" but it just ate up all the remaining fields. When I tried to debug it I just ended up rewriting it as state based logic:
enum SplitState { s_begin, s_infield, s_inquotefield, s_foundquoteinfield };
public static IEnumerable<string> SplitRow(string row, char delimiter = ',')
{
var currentString = new StringBuilder();
SplitState state = SplitState.s_begin;
row = string.Format("{0}{1}", row, delimiter); //We add new cells at the delimiter, so append one for the parser.
foreach (var character in row.Select((val, index) => new { val, index }))
{
//Console.WriteLine("character = " + character.val + " state = " + state);
switch (state)
{
case SplitState.s_begin:
if (character.val == delimiter)
{
/* empty field */
yield return currentString.ToString();
currentString.Clear();
} else if (character.val == '"')
{
state = SplitState.s_inquotefield;
} else
{
currentString.Append(character.val);
state = SplitState.s_infield;
}
break;
case SplitState.s_infield:
if (character.val == delimiter)
{
/* field with data */
yield return currentString.ToString();
state = SplitState.s_begin;
currentString.Clear();
} else
{
currentString.Append(character.val);
}
break;
case SplitState.s_inquotefield:
if (character.val == '"')
{
// could be end of field, or escaped quote.
state = SplitState.s_foundquoteinfield;
} else
{
currentString.Append(character.val);
}
break;
case SplitState.s_foundquoteinfield:
if (character.val == '"')
{
// found escaped quote.
currentString.Append(character.val);
state = SplitState.s_inquotefield;
}
else if (character.val == delimiter)
{
// must have been last quote so we must find delimiter
yield return currentString.ToString();
state = SplitState.s_begin;
currentString.Clear();
}
else
{
throw new Exception("Quoted field not terminated.");
}
break;
default:
throw new Exception("unknown state:" + state);
}
}
//Console.WriteLine("currentstring = " + currentString.ToString());
}
This is a lot more lines of code than the other solutions, but it is easy to modify to add edge cases.
so I have this code. I need to generate a for loop that checks all the characters in the string and checks if they are all valid(So numbers from 0->7). But I don't know how to write it, I tried something but it didn't work. Here are the examples:user enters: 77, code works, user enters 99, code doesn't work, user enters 5., code doesn't work, etc..
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace NALOGA1
{
class Program
{
static string decToOct(int stevilo)//v mojon primere 7
{
string izhod = "";
//7>0 DRŽI
while (stevilo > 0)
{
//izhodi se dodeli ostanek deljenja z 8 keri se spremeni v string
izhod = (stevilo % 8) + izhod;
//7/8;
stevilo /= 8;
}
return izhod;
}
static int Octtodesetisko(string stevilo)
{
double vsota = 0;
for (int i = stevilo.Length - 1; i >= 0; i--)
{
int stevka = stevilo[i] - '0';
vsota += (stevka * Math.Pow(8, i));
}
return (int)vsota;
}
static void Main(string[] args)
{
//3 podprogram-in progress
string prvastevilka = Console.ReadLine();
int prvasprememba = Int32.Parse(prvastevilka);
if (prvasprememba > 0)
{
Console.WriteLine(decToOct(prvasprememba));
}
else
{
Console.WriteLine("Napaka");
}
string drugastevilka = Console.ReadLine();
int drugasprememba = Octtodesetisko(drugastevilka);
foreach (char znak in drugastevilka)
{
if(znak!=1 || znak!=2 || znak!=3 || znak!=4 || znak!=5 || znak!=6 || znak!=7)
{
Console.WriteLine("Napaka");
}
else
{
Console.WriteLine("dela :D");
}
}
Console.ReadKey();
}
}
}
Personally, I would take advantage of the LINQ Enumerable.All method to express this in a very concise and readable way:
if (str.Any() && str.All(c => c >= '0' && c <= '7'))
{
Console.WriteLine("good");
}
else
{
Console.WriteLine("bad");
}
EDIT: No LINQ
It's not hard to translate what the LINQ Enumerable.All method does to a normal loop. It's just more verbose:
bool isValid = true;
foreach (char c in str)
{
if (c < '0' || c > '7')
{
isValid = false;
break;
}
}
if (str.Length != 0 && isValid)
{
Console.WriteLine("good");
}
else
{
Console.WriteLine("bad");
}
Firstly, there seems to be a mistake in the line
if(znak!=1 || znak!=2 || znak!=3 || znak!=4 || znak!=5 || znak!=6 || znak!=7)
I guess it should read
if(znak!='1' || znak!='2' || znak!='3' || znak!='4' || znak!='5' || znak!='6' || znak!='7')
which should be compressed to
if (znak >= '0' && znak <= '7')
You can use linq instead of the for loop here like this:
if (drugastevilka.All(c => c >= '0' && c <= '7')
Console.WriteLine("dela :D");
else
Console.WriteLine("Napaka");
But the best solution is probably to use a regular expression:
Regex regex = new Regex("^[0-7]+$");
if (regex.IsMatch(drugastevilka))
Console.WriteLine("dela :D");
else
Console.WriteLine("Napaka");
Edit: the linq solution shown accepts empty strings, the regex (as shown) needs at least 1 character. Exchange the + with a * and it will accept empty strings, too. But I don't think you want to accept empty strings.
You are messing up with the datatype
Can you try with below code
static string decToOct(int stevilo)//v mojon primere 7
{
int izhod = 0;
//7>0 DRŽI
while (stevilo > 0)
{
//izhodi se dodeli ostanek deljenja z 8 keri se spremeni v string
izhod = (stevilo % 8) + izhod;
//7/8;
stevilo /= 8;
}
return (izhod.ToString());
}
What about something like this?
class Program
{
static void Main(string[] args)
{
string someString = "1234567";
string someOtherString = "1287631";
string anotherString = "123A6F2";
Console.WriteLine(IsValidString(someString));
Console.WriteLine(IsValidString(someOtherString));
Console.WriteLine(IsValidString(anotherString));
Console.ReadLine();
}
public static bool IsValidString(string str)
{
bool isValid = true;
char[] splitString = str.ToCharArray(); //get an array of each character
for (int i = 0; i < splitString.Length; i++)
{
try
{
double number = Char.GetNumericValue(splitString[i]); //try to convert the character to a double (GetNumericValue returns a double)
if (number < 0 || number > 7) //we get here if the character is an int, then we check for 0-7
{
isValid = false; //if the character is invalid, we're done.
break;
}
}
catch (Exception) //this will hit if we try to convert a non-integer character.
{
isValid = false;
break;
}
}
return isValid;
}
}
IsValidString() takes a string, converts it to a Char array, then checks each value as such:
Get the numeric value
Check if the value is between 0-7
GetNumericValue will fail on a non-integer character, so we wrap it in a try/catch - if we hit an exception we know that isValid = false, so we break.
If we get a valid number, and it's not between 0-7 we also know that isValid = false, so we break.
If we make it all the way through the list, the string is valid.
The sample given above returns:
IsValidString(someString) == true
IsValidString(someOtherString) == false
IsValidString(anotherString) == false
I am working on a logic that decreases the value of an alphanumeric List<char>. For example, A10 becomes A9, BBA becomes BAZ, 123 becomes 122. And yes, if the value entered is the last one(like A or 0), then I should return -
An additional overhead is that there is a List<char> variable which is maintained by the user. It has characters which are to be skipped. For example, if the list contains A in it, the value GHB should become GGZ and not GHA.
The base of this logic is a very simple usage of decreasing the char but with these conditions, I am finding it very difficult.
My project is in Silverlight, the language is C#. Following is my code that I have been trying to do in the 3 methods:
List<char> lstGetDecrName(List<char> lstVal)//entry point of the value that returns decreased value
{
List<char> lstTmp = lstVal;
subCheckEmpty(ref lstTmp);
switch (lstTmp.Count)
{
case 0:
lstTmp.Add('-');
return lstTmp;
case 1:
if (lstTmp[0] == '-')
{
return lstTmp;
}
break;
case 2:
if (lstTmp[1] == '0')
{
if (lstTmp[0] == '1')
{
lstTmp.Clear();
lstTmp.Add('9');
return lstTmp;
}
if (lstTmp[0] == 'A')
{
lstTmp.Clear();
lstTmp.Add('-');
return lstTmp;
}
}
if (lstTmp[1] == 'A')
{
if (lstTmp[0] == 'A')
{
lstTmp.Clear();
lstTmp.Add('Z');
return lstTmp;
}
}
break;
}
return lstGetDecrValue(lstTmp,lstVal);
}
List<char> lstGetDecrValue(List<char> lstTmp,List<char> lstVal)
{
List<char> lstValue = new List<char>();
switch (lstTmp.Last())
{
case 'A':
lstValue = lstGetDecrTemp('Z', lstTmp, lstVal);
break;
case 'a':
lstValue = lstGetDecrTemp('z', lstTmp, lstVal);
break;
case '0':
lstValue = lstGetDecrTemp('9', lstTmp, lstVal);
break;
default:
char tmp = (char)(lstTmp.Last() - 1);
lstTmp.RemoveAt(lstTmp.Count - 1);
lstTmp.Add(tmp);
lstValue = lstTmp;
break;
}
return lstValue;
}
List<char> lstGetDecrTemp(char chrTemp, List<char> lstTmp, List<char> lstVal)//shifting places eg unit to ten,etc.
{
if (lstTmp.Count == 1)
{
lstTmp.Clear();
lstTmp.Add('-');
return lstTmp;
}
lstTmp.RemoveAt(lstTmp.Count - 1);
lstVal = lstGetDecrName(lstTmp);
lstVal.Insert(lstVal.Count, chrTemp);
return lstVal;
}
I seriously need help for this. Please help me out crack through this.
The problem you are trying to solve is actually how to decrement discreet sections of a sequence of characters, each with it's own counting system, where each section is separated by a change between Alpha and Numeric. The rest of the problem is easy once you identify this.
The skipping of unwanted characters is simply a matter of repeating the decrement if you get an unwanted character in the result.
One difficultly is the ambiguous definition of the sequences. e.g. what to do when you get down to say A00, what is next? "A" or "-". For the sake of argument I am assuming a practical implementation based loosely on Excel cell names (i.e. each section operates independently of the others).
The code below does 95% of what you wanted, however there is a bug in the exclusions code. e.g. "ABB" becomes "AAY". I feel the exclusions need to be applied at a higher level (e.g. repeat decrement until no character is in the exclusions list), but I don't have time to finish it now. Also it is resulting in a blank string when it counts down to nothing, rather than the "-" you wanted, but that is trivial to add at the end of the process.
Part 1 (divide the problem into sections):
public static string DecreaseName( string name, string exclusions )
{
if (string.IsNullOrEmpty(name))
{
return name;
}
// Split the problem into sections (reverse order)
List<StringBuilder> sections = new List<StringBuilder>();
StringBuilder result = new StringBuilder(name.Length);
bool isNumeric = char.IsNumber(name[0]);
StringBuilder sb = new StringBuilder();
sections.Add(sb);
foreach (char c in name)
{
// If we change between alpha and number, start new string.
if (char.IsNumber(c) != isNumeric)
{
isNumeric = char.IsNumber(c);
sb = new StringBuilder();
sections.Insert(0, sb);
}
sb.Append(c);
}
// Now process each section
bool cascadeToNext = true;
foreach (StringBuilder section in sections)
{
if (cascadeToNext)
{
result.Insert(0, DecrementString(section, exclusions, out cascadeToNext));
}
else
{
result.Insert(0, section);
}
}
return result.ToString().Replace(" ", "");
}
Part2 (decrement a given string):
private static string DecrementString(StringBuilder section, string exclusions, out bool cascadeToNext)
{
bool exclusionsExist = false;
do
{
exclusionsExist = false;
cascadeToNext = true;
// Process characters in reverse
for (int i = section.Length - 1; i >= 0 && cascadeToNext; i--)
{
char c = section[i];
switch (c)
{
case 'A':
c = (i > 0) ? 'Z' : ' ';
cascadeToNext = (i > 0);
break;
case 'a':
c = (i > 0) ? 'z' : ' ';
cascadeToNext = (i > 0);
break;
case '0':
c = (i > 0) ? '9' : ' ';
cascadeToNext = (i > 0);
break;
case ' ':
cascadeToNext = false;
break;
default:
c = (char)(((int)c) - 1);
if (i == 0 && c == '0')
{
c = ' ';
}
cascadeToNext = false;
break;
}
section[i] = c;
if (exclusions.Contains(c.ToString()))
{
exclusionsExist = true;
}
}
} while (exclusionsExist);
return section.ToString();
}
The dividing can of course be done more efficiently, just passing start and end indexes to the DecrementString, but this is easier to write & follow and not much slower in practical terms.
do a check if its a number if so then do a minus math of the number, if its a string then change it to char codes and then the char code minus 1
I couldn't stop thinking about this yesterday, so here's an idea. Note, this is just pseudo-code, and not tested, but I think the idea is valid and should work (with a few modifications).
The main point is to define your "alphabet" directly, and specify which characters in it are illegal and should be skipped, then use a list or array of positions in this alphabet to define the word you start with.
I can't spend any more time on this right now, but please let me know if you decide to use it and get it to work!
string[] alphabet = {a, b, c, d, e};
string[] illegal = {c, d};
public string ReduceString(string s){
// Create a list of the alphabet-positions for each letter:
int[] positionList = s.getCharsAsPosNrsInAlphabet();
int[] reducedPositionList = ReduceChar(positionList, positionList.length);
string result = "";
foreach(int pos in reducedPositionList){
result += alphabet[pos];
}
return result;
}
public string ReduceChar(string[] positionList, posToReduce){
int reducedCharPosition = ReduceToNextLegalChar(positionList[posToReduce]);
// put reduced char back in place:
positionList[posToReduce] = reducedCharPosition;
if(reducedCharPosition < 0){
if(posToReduce <= 0){
// Reached the end, reduced everything, return empty array!:
return new string[]();
}
// move to back of alphabet again (ie, like the 9 in "11 - 2 = 09"):
reducedCharPosition += alphabet.length;
// Recur and reduce next position (ie, like the 0 in "11 - 2 = 09"):
return ReduceChar(positionList, posToReduce-1);
}
return positionList;
}
public int ReduceToNextLegalChar(int pos){
int nextPos = pos--;
return (isLegalChar(nextPos) ? nextPos : ReduceToNextLegalChar(nextPos));
}
public boolean IsLegalChar(int pos){
return (! illegal.contains(alphabet[pos]));
}
enter code here
Without writing all your code for you, here's a suggestion as to how you can break this down:
char DecrementAlphaNumericChar(char input, out bool hadToWrap)
{
if (input == 'A')
{
hadToWrap = true;
return 'Z';
}
else if (input == '0')
{
hadToWrap = true;
return '9';
}
else if ((input > 'A' && input <= 'Z') || (input > '0' && input <= '9'))
{
hadToWrap = false;
return (char)((int)input - 1);
}
throw new ArgumentException(
"Characters must be digits or capital letters",
"input");
}
char DecrementAvoidingProhibited(
char input, List<char> prohibited, out bool hadToWrap)
{
var potential = DecrementAlphaNumericChar(input, out hadToWrap);
while (prohibited.Contains(potential))
{
bool temp;
potential = DecrementAlphaNumericChar(potential, out temp);
if (potential == input)
{
throw new ArgumentException(
"A whole class of characters was prohibited",
"prohibited");
}
hadToWrap |= temp;
}
return potential;
}
string DecrementString(string input, List<char> prohibited)
{
char[] chrs = input.ToCharArray();
for (int i = chrs.Length - 1; i >= 0; i--)
{
bool wrapped;
chrs[i] = DecrementAvoidingProhibited(
chrs[i], prohibited, out wrapped);
if (!wrapped)
return new string(chrs);
}
return "-";
}
The only issue here is that it will reduce e.g. A10 to A09 not A9. I actually prefer this myself, but it should be simple to write a final pass that removes the extra zeroes.
For a little more performance, replace the List<char>s with Hashset<char>s, they should allow a faster Contains lookup.
I found solution to my own answer with some other workarounds.
The calling function:
MyFunction()
{
//stuff I do before
strValue = lstGetDecrName(strValue.ToList());//decrease value here
if (strValue.Contains('-'))
{
strValue = "-";
}
//stuff I do after
}
In all there are 4 functions. 2 Main functions and 2 helper functions.
List<char> lstGetDecrName(List<char> lstVal)//entry point, returns decreased value
{
if (lstVal.Contains('-'))
{
return "-".ToList();
}
List<char> lstTmp = lstVal;
subCheckEmpty(ref lstTmp);
switch (lstTmp.Count)
{
case 0:
lstTmp.Add('-');
return lstTmp;
case 1:
if (lstTmp[0] == '-')
{
return lstTmp;
}
break;
case 2:
if (lstTmp[1] == '0')
{
if (lstTmp[0] == '1')
{
lstTmp.Clear();
lstTmp.Add('9');
return lstTmp;
}
if (lstTmp[0] == 'A')
{
lstTmp.Clear();
lstTmp.Add('-');
return lstTmp;
}
}
if (lstTmp[1] == 'A')
{
if (lstTmp[0] == 'A')
{
lstTmp.Clear();
lstTmp.Add('Z');
return lstTmp;
}
}
break;
}
List<char> lstValue = new List<char>();
switch (lstTmp.Last())
{
case 'A':
lstValue = lstGetDecrTemp('Z', lstTmp, lstVal);
break;
case 'a':
lstValue = lstGetDecrTemp('z', lstTmp, lstVal);
break;
case '0':
lstValue = lstGetDecrTemp('9', lstTmp, lstVal);
break;
default:
char tmp = (char)(lstTmp.Last() - 1);
lstTmp.RemoveAt(lstTmp.Count - 1);
lstTmp.Add(tmp);
subCheckEmpty(ref lstTmp);
lstValue = lstTmp;
break;
}
lstGetDecrSkipValue(lstValue);
return lstValue;
}
List<char> lstGetDecrSkipValue(List<char> lstValue)
{
bool blnSkip = false;
foreach (char tmpChar in lstValue)
{
if (lstChars.Contains(tmpChar))
{
blnSkip = true;
break;
}
}
if (blnSkip)
{
lstValue = lstGetDecrName(lstValue);
}
return lstValue;
}
void subCheckEmpty(ref List<char> lstTmp)
{
bool blnFirst = true;
int i = -1;
foreach (char tmpChar in lstTmp)
{
if (char.IsDigit(tmpChar) && blnFirst)
{
i = tmpChar == '0' ? lstTmp.IndexOf(tmpChar) : -1;
if (tmpChar == '0')
{
i = lstTmp.IndexOf(tmpChar);
}
blnFirst = false;
}
}
if (!blnFirst && i != -1)
{
lstTmp.RemoveAt(i);
subCheckEmpty(ref lstTmp);
}
}
List<char> lstGetDecrTemp(char chrTemp, List<char> lstTmp, List<char> lstVal)//shifting places eg unit to ten,etc.
{
if (lstTmp.Count == 1)
{
lstTmp.Clear();
lstTmp.Add('-');
return lstTmp;
}
lstTmp.RemoveAt(lstTmp.Count - 1);
lstVal = lstGetDecrName(lstTmp);
lstVal.Insert(lstVal.Count, chrTemp);
subCheckEmpty(ref lstVal);
return lstVal;
}