Im trying to read contents of a csv file into different variables in order to send to a web service.It has been working fine but suddenly today i got and exception.
index was outside the bounds of the array:
what Did I do wrong?
String sourceDir = #"\\198.0.0.4\e$\Globus\LIVE\bnk.run\URA.BP\WEBOUT\";
// Process the list of files found in the directory.
string[] fileEntries = Directory.GetFiles(sourceDir);
foreach (string fileName2 in fileEntries)
{
// read values
StreamReader st = new StreamReader(fileName2);
while (st.Peek() >= 0)
{
String report1 = st.ReadLine();
String[] columns = report1.Split(','); //split columns
String prnout = columns[0];
String tinout = columns[1];
String amtout = columns[2];
String valdate = columns[3];
String paydate = columns[4];
String status = columns[5];
String branch = columns[6];
String reference = columns[7];
}
}
It's hard to guess without even seeing the .csv file, but my first one would be that you don't have 8 columns.
It would be easier if you could show the original .csv file, and tell us where the exception pops.
edit: If you think the data is alright, I'd suggest you debugging and see what the split call returns in Visual Studio. That might help
edit2: And since you're doing that processing in a loop, make sure each row has at least 8 columns.
My money is on bad data file. If that is the only thing in the equation that has changed (aka you haven't made any code changes) then that's pretty much your only option.
If your data file isn't too long post it here and we can tell you for sure.
You can add something like below to check for invalid column lengths:
while (st.Peek() >= 0)
{
String report1 = st.ReadLine();
String[] columns = report1.Split(','); //split columns
if(columns.Length < 8)
{
//Log something useful, throw an exception, whatever.
//You have the option to quitely note that there was a problem and
//continue on processing the rest of the file if you want.
continue;
}
//working with columns below
}
Just for sanity's sake, I combined all the various notes written here. This code is a bit cleaner and has some validation in it.
Try this:
string dir = #"\\198.0.0.4\e$\Globus\LIVE\bnk.run\URA.BP\WEBOUT\";
foreach (string fileName2 in Directory.GetFiles(dir)) {
StreamReader st = new StreamReader(fileName2);
while (!sr.EndOfStream) {
string line = sr.ReadLine();
if (!String.IsNullOrEmpty(line)) {
string[] columns = line.Split(',');
if (columns.Length == 8) {
string prnout = columns[0];
string tinout = columns[1];
string amtout = columns[2];
string valdate = columns[3];
string paydate = columns[4];
string status = columns[5];
string branch = columns[6];
string reference = columns[7];
}
}
}
}
EDIT: As some other users have commented, the CSV format also accepts text qualifiers, which usually means the double quote symbol ("). For example, a text qualified line may look like this:
user,"Hello!",123.23,"$123,123.12",and so on,
Writing CSV parsing code is a little more complicated when you have a fully formatted file like this. Over the years I've been parsing improperly formatted CSV files, I've worked up a standard code script that passes virtually all unit tests, but it's a pain to explain.
/// <summary>
/// Read in a line of text, and use the Add() function to add these items to the current CSV structure
/// </summary>
/// <param name="s"></param>
public static bool TryParseLine(string s, char delimiter, char text_qualifier, out string[] array)
{
bool success = true;
List<string> list = new List<string>();
StringBuilder work = new StringBuilder();
for (int i = 0; i < s.Length; i++) {
char c = s[i];
// If we are starting a new field, is this field text qualified?
if ((c == text_qualifier) && (work.Length == 0)) {
int p2;
while (true) {
p2 = s.IndexOf(text_qualifier, i + 1);
// for some reason, this text qualifier is broken
if (p2 < 0) {
work.Append(s.Substring(i + 1));
i = s.Length;
success = false;
break;
}
// Append this qualified string
work.Append(s.Substring(i + 1, p2 - i - 1));
i = p2;
// If this is a double quote, keep going!
if (((p2 + 1) < s.Length) && (s[p2 + 1] == text_qualifier)) {
work.Append(text_qualifier);
i++;
// otherwise, this is a single qualifier, we're done
} else {
break;
}
}
// Does this start a new field?
} else if (c == delimiter) {
list.Add(work.ToString());
work.Length = 0;
// Test for special case: when the user has written a casual comma, space, and text qualifier, skip the space
// Checks if the second parameter of the if statement will pass through successfully
// e.g. "bob", "mary", "bill"
if (i + 2 <= s.Length - 1) {
if (s[i + 1].Equals(' ') && s[i + 2].Equals(text_qualifier)) {
i++;
}
}
} else {
work.Append(c);
}
}
list.Add(work.ToString());
// If we have nothing in the list, and it's possible that this might be a tab delimited list, try that before giving up
if (list.Count == 1 && delimiter != DEFAULT_TAB_DELIMITER) {
string[] tab_delimited_array = ParseLine(s, DEFAULT_TAB_DELIMITER, DEFAULT_QUALIFIER);
if (tab_delimited_array.Length > list.Count) {
array = tab_delimited_array;
return success;
}
}
// Return the array we parsed
array = list.ToArray();
return success;
}
You should note that, even as complicated as this algorithm is, it still is unable to parse CSV files where there are embedded newlines within a text qualified value, for example, this:
123,"Hi, I am a CSV File!
I am saying hello to you!
But I also have embedded newlines in my text.",2012-07-23
To solve those, I have a multiline parser that uses the Try() feature to add additional lines of text to verify that the main function worked correctly:
/// <summary>
/// Parse a line whose values may include newline symbols or CR/LF
/// </summary>
/// <param name="sr"></param>
/// <returns></returns>
public static string[] ParseMultiLine(StreamReader sr, char delimiter, char text_qualifier)
{
StringBuilder sb = new StringBuilder();
string[] array = null;
while (!sr.EndOfStream) {
// Read in a line
sb.Append(sr.ReadLine());
// Does it parse?
string s = sb.ToString();
if (TryParseLine(s, delimiter, text_qualifier, out array)) {
return array;
}
}
// Fails to parse - return the best array we were able to get
return array;
}
Since you don't know how many columns will be in csv file, you might need to test for length:
if (columns.Length == 8) {
String prnout = columns[0];
String tinout = columns[1];
...
}
I bet you just got an empty line (extra EOL at the end), and that's as simple as that
Related
I am trying to write a parser that reads the third to last element from a list. This is a number or nothing. This is my code:
public int parseLastId(String filepath)
{
List<string> lines = new List<string>();
String text = File.ReadAllLines(filepath).ToString();
lines = text.Split(',').ToList();
int id = Convert.ToInt32(lines[lines.Count - 3]);
return id;
}
the text in the file is like this:
1,Joe,2,Jeff,
File.ReadAllLines will read every line from a text file.
In your text file, you only have 1 line.
So you can change
String text = File.ReadAllLines(filepath).ToString();
to
// Get the first line from the text file.
String text = File.ReadAllLines(filepath)[0];
The rest of your program is fine then.
Idiomatic C#
Just to note, that your code isn't written in standard C# style.
In particular:
string should be used instead of String.
Method names should start with a capital letter
The call to .ToList() on text.Split(',') is unnecessary.
Here is an edited version of your code that's more in line with typical C# standards - hope that helps :)
public int ParseLastId(string filepath)
{
var text = File.ReadAllLines(filepath)[0];
var lines = text.Split(',');
var id = Convert.ToInt32(lines[lines.Length - 3]);
return id;
}
Error handling
You state that you want to return nothing if the program cannot parse the entry. To do that, you will want to return a "nullable int" (int?) from your method, and use the int.TryParse method.
For example:
public int? ParseLastId(string filepath)
{
var text = File.ReadAllLines(filepath)[0];
var lines = text.Split(',');
if (int.TryParse(lines[lines.Length - 3], out var id)
{
return id;
}
else
{
return null;
}
}
I assume by "return nothing" you mean 0
public int parseLastId(string filepath)
{
string text = File.ReadAllText(filepath);
string[] lines = text.Split(',');
return lines.Length >= 3 && int.TryParse(lines[lines.Length - 3], out int id) ? id : 0;
}
I try to get a file's content into an array in C#.
My program is similar to a shopping list.
I'm not able to store the data as a JSON file cause of some requirements.
I decided to store the data like:
3x water
2x fish
I grabbed the data using a simple StreamReader and got all the data into a string.
I tried to separate the string by \n using split and store the data into a string array.
Now I wanted to split each string again to get the quantity of the product.
I tried splitting each index using a foreach and storing them in a second array. I also tried to store the data in the same array.
I want to get something like
string[] cars = {"3", "water", "2", "fish"};
or store the data in a list.
Is my attempt a bad one?
Is there a way to do it much more easily?
Easy is a very subjective requirement.
You could write it in one statement with a variant of Split that restricts it to splitting at most once (so you can use an x in a description), and uses SelectMany to fold down all the lines into a single array of strings - as you asked.
string[] cars = File.ReadAllLines("input.txt")
.SelectMany(line => line.Split(new char[] { 'x' }, 2))
.Select(s => s.Trim())
.ToArray();
...but I wouldn't.
I think that maintainability is important. This code is understandable, useful, and obvious.
using System;
using System.Collections.Generic;
using System.IO;
class Program
{
public class Car
{
private int quantity;
private string description;
public Car(int quantity, string description)
{
this.quantity = quantity;
this.description = description;
}
public override string ToString() => $"{quantity}x {description}";
}
static void Main(string[] args)
{
// Use a list to store all the entries.
List<Car> cars = new List<Car>();
string[] lines = File.ReadAllLines("input.txt");
// Parse each line of the file.
foreach (var line in lines)
{
// Ignore completely blank lines.
if (string.IsNullOrWhiteSpace(line))
continue;
// Find the delimiter 'x'.
int pos = line.IndexOf('x');
// Handle case where no delimiter is present.
if (pos < 0)
throw new FormatException("The line is in an invalid format because it does not contain an 'x'.");
// Split the string into two parts at the delimiter position.
string firstPart = line.Substring(0, pos); // everything before the delimiter
string lastPart = line.Substring(pos + 1); // everything after the delimiter
// Interpret the first part as an integer.
if (!int.TryParse(firstPart, out int quantity))
throw new FormatException("The quantity is not a number.");
// Disallow zero or less for quantities.
if (quantity < 1)
throw new InvalidDataException("The quantity is not a positive number.");
// Trim whitespace from the description.
string description = lastPart.Trim();
// Require a non-empty description.
if (string.IsNullOrWhiteSpace(description))
throw new InvalidDataException("The description is missing.");
cars.Add(new Car(quantity, description));
}
}
}
If we suppose that each item has it's own line inside the file, this solution goes through each line and separates the quantity and the name of the item, after that these are stored in an array.
private static string[] GetData()
{
using (StreamReader sr = new StreamReader("source.in"))
{
int numberOfItems = File.ReadAllLines("source.in").Length;
string[] Data = new string[numberOfItems * 2];
int DataIndex = -1;
string line;
while ((line = sr.ReadLine()) != null)
{
//Separate the things
int index = line.IndexOf('x');
string num = line.Substring(0, index++);
Data[++DataIndex] = num;
string itm = line.Substring(++index);
Data[++DataIndex] = itm;
}
return Data;
}
}
I need to create a function that receives two strings, representing the word to be completed and the reference word, as well as a character corresponding to the proposed letter, and returns a string corresponding to the word to be completed in which all occurrences of the proposed letter have been added, relative to the reference word.
Example: CompleterMot (".. IM ..", "ANIMAL", 'A') should return "A.IMA.".
I don't understand how I can add all occurrences of the letter in the word that will be completed.
static string CompleterMot(string motincomplet, string motoriginal, char lettrepropos)
{
string output = " ";
for (int i = 0; i < motoriginal.Length; i++)
{
if((motoriginal[i] == lettrepropos))
{
output = motincomplet;
if(output[i] != lettrepropos)
output += (char)(lettrepropos);
}
}
return output;
}
In final I had ..IM..A and I don't know how to fix my code.
In your loop, you are doing this : output = motincomplet; this override the previous result. Then you append the expected letter to the output that gives "..IM.." + 'A' as result.
You can use a StringBuilder for string manipulation, that's pretty nice and allow you to directly change a character at a given index :
static string CompleterMot(string motincomplet, string motoriginal, char lettrepropos)
{
var sb = new System.Text.StringBuilder(motincomplet);
for (int i = 0; i < motoriginal.Length; i++)
{
if (motoriginal[i] == lettrepropos)
{
sb[i] = lettrepropos;
}
}
return sb.ToString();
}
I've tried a few different methods and none of them work correctly so I'm just looking for someone to straight out show me how to do it . I want my application to read in a file based on an OpenFileDialog.
When the file is read in I want to go through it and and run this function which uses Linq to insert the data into my DB.
objSqlCommands.sqlCommandInsertorUpdate
However I want to go through the string , counting the number of ","'s found . when the number reaches four I want to only take the characters encountered until the next "," and do this until the end of the file .. can someone show me how to do this ?
Based on the answers given here my code now looks like this
string fileText = File.ReadAllText(ofd.FileName).Replace(Environment.NewLine, ",");
int counter = 0;
int idx = 0;
List<string> foo = new List<string>();
foreach (char c in fileText.ToArray())
{
idx++;
if (c == ',')
{
counter++;
}
if (counter == 4)
{
string x = fileText.Substring(idx);
foo.Add(fileText.Substring(idx, x.IndexOf(',')));
counter = 0;
}
}
foreach (string s in foo)
{
objSqlCommands.sqlCommandInsertorUpdate("INSERT", s);//laClient[0]);
}
However I am getting an "length cannot be less than 0" error on the foo.add function call , any ideas ?
A Somewhat hacky example. You would pass this the entire text from your file as a single string.
string str = "1,2,3,4,i am some text,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20";
int counter = 0;
int idx = 0;
List<string> foo = new List<string>();
foreach (char c in str.ToArray())
{
idx++;
if (c == ',')
{
counter++;
}
if (counter == 4)
{
string x = str.Substring(idx);
foo.Add(str.Substring(idx, x.IndexOf(',')));
counter = 0;
}
}
foreach(string s in foo)
{
Console.WriteLine(s);
}
Console.Read();
Prints:
i am some text
9
13
17
As Raidri indicates in his answer, String.Split is definitely your friend. To catch every fifth word, you could try something like this (not tested):
string fileText = File.ReadAllText(OpenDialog.FileName).Replace(Environment.NewLine, ",");
string words[] = fileText.Split(',');
List<string> everFifthWord = new List<string>();
for (int i = 4; i <= words.Length - 1, i + 5)
{
everyFifthWord.Add(words[i]);
}
The above code reads the selected file from the OpenFileDialog, then replaces every newline with a ",". Then it splits the string on ",", and starting with the fifth word takes every fifth word in the string and adds it to the list.
File.ReadAllText reads a text file to a string and Split turns that string into an array seperated at the commas:
File.ReadAllText(OpenDialog.FileName).Split(',')[4]
If you have more than one line use:
File.ReadAllLines(OpenDialog.FileName).Select(l => l.Split(',')[4])
This gives an IEnumerable<string> where each string contains the wanted part from one line of the file
It's not clear to me if you're after every fifth piece of text between the commas or if there are multiple lines and you want only the fifth piece of text on each line. So I've done both.
Every fifth piece of text:
var text = "1,2,3,4,i am some text,6,7,8,9"
+ ",10,11,12,13,14,15,16,17,18,19,20";
var everyFifth =
text
.Split(',')
.Where((x, n) => n % 5 == 4);
Only the fifth piece of text on each line:
var lines = new []
{
"1,2,3,4,i am some text,6,7",
"8,9,10,11,12,13,14,15",
"16,17,18,19,20",
};
var fifthOnEachLine =
lines
.Select(x => x.Split(',')[4]);
I love using the #"strings" in c#, especially when I have a lot of multi-line text. The only annoyance is that my code formatting goes to doodie when doing this, because the second and greater lines are pushed fully to the left instead of using the indentation of my beautifully formatted code. I know this is by design, but is there some option/hack way of allowing these lines to be indented, without adding the actual tabs/spaces to the output?
adding example:
var MyString = #" this is
a multi-line string
in c#.";
My variable declaration is indented to the "correct" depth, but the second and further lines in the string get pushed to the left margin- so the code is kinda ugly. You could add tabs to the start of line 2 and 3, but the string itself would then contain those tabs... make sense?
How about a string extension? Update: I reread your question and I hope there is a better answer. This is something that bugs me too and having to solve it as below is frustrating but on the plus side it does work.
using System.Text.RegularExpressions;
namespace ConsoleApplication1
{
public static class StringExtensions
{
public static string StripLeadingWhitespace(this string s)
{
Regex r = new Regex(#"^\s+", RegexOptions.Multiline);
return r.Replace(s, string.Empty);
}
}
}
And an example console program:
using System;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
string x = #"This is a test
of the emergency
broadcasting system.";
Console.WriteLine(x);
Console.WriteLine();
Console.WriteLine("---");
Console.WriteLine();
Console.WriteLine(x.StripLeadingWhitespace());
Console.ReadKey();
}
}
}
And the output:
This is a test
of the emergency
broadcasting system.
---
This is a test
of the emergency
broadcasting system.
And a cleaner way to use it if you decide to go this route:
string x = #"This is a test
of the emergency
broadcasting system.".StripLeadingWhitespace();
// consider renaming extension to say TrimIndent() or similar if used this way
Cymen has given the right solution. I use a similar approach as derived from Scala's stripMargin() method. Here's what my extension method looks like:
public static string StripMargin(this string s)
{
return Regex.Replace(s, #"[ \t]+\|", string.Empty);
}
Usage:
var mystring = #"
|SELECT
| *
|FROM
| SomeTable
|WHERE
| SomeColumn IS NOT NULL"
.StripMargin();
Result:
SELECT
*
FROM
SomeTable
WHERE
SomeColumn IS NOT NULL
I can't think of an answer that would completely satisfy your question, however you could write a function that strips leading spaces from lines of text contained in a string and call it on each creation of such a string.
var myString = TrimLeadingSpacesOfLines(#" this is a
a multi-line string
in c#.");
Yes it is a hack, but you specified your acceptance of a hack in your question.
Here is a longish solution which tries to mimic textwrap.dedent as much as possible. The first line is left as-is and expected not to be indented. (You can generate the unit tests based on the doctests using doctest-csharp.)
/// <summary>
/// Imitates the Python's
/// <a href="https://docs.python.org/3/library/textwrap.html#textwrap.dedent">
/// <c>textwrap.dedent</c></a>.
/// </summary>
/// <param name="text">Text to be dedented</param>
/// <returns>array of dedented lines</returns>
/// <code doctest="true">
/// Assert.That(Dedent(""), Is.EquivalentTo(new[] {""}));
/// Assert.That(Dedent("test me"), Is.EquivalentTo(new[] {"test me"}));
/// Assert.That(Dedent("test\nme"), Is.EquivalentTo(new[] {"test", "me"}));
/// Assert.That(Dedent("test\n me"), Is.EquivalentTo(new[] {"test", " me"}));
/// Assert.That(Dedent("test\n me\n again"), Is.EquivalentTo(new[] {"test", "me", " again"}));
/// Assert.That(Dedent(" test\n me\n again"), Is.EquivalentTo(new[] {" test", "me", " again"}));
/// </code>
private static string[] Dedent(string text)
{
var lines = text.Split(
new[] {"\r\n", "\r", "\n"},
StringSplitOptions.None);
// Search for the first non-empty line starting from the second line.
// The first line is not expected to be indented.
var firstNonemptyLine = -1;
for (var i = 1; i < lines.Length; i++)
{
if (lines[i].Length == 0) continue;
firstNonemptyLine = i;
break;
}
if (firstNonemptyLine < 0) return lines;
// Search for the second non-empty line.
// If there is no second non-empty line, we can return immediately as we
// can not pin the indent.
var secondNonemptyLine = -1;
for (var i = firstNonemptyLine + 1; i < lines.Length; i++)
{
if (lines[i].Length == 0) continue;
secondNonemptyLine = i;
break;
}
if (secondNonemptyLine < 0) return lines;
// Match the common prefix with at least two non-empty lines
var firstNonemptyLineLength = lines[firstNonemptyLine].Length;
var prefixLength = 0;
for (int column = 0; column < firstNonemptyLineLength; column++)
{
char c = lines[firstNonemptyLine][column];
if (c != ' ' && c != '\t') break;
bool matched = true;
for (int lineIdx = firstNonemptyLine + 1; lineIdx < lines.Length;
lineIdx++)
{
if (lines[lineIdx].Length == 0) continue;
if (lines[lineIdx].Length < column + 1)
{
matched = false;
break;
}
if (lines[lineIdx][column] != c)
{
matched = false;
break;
}
}
if (!matched) break;
prefixLength++;
}
if (prefixLength == 0) return lines;
for (var i = 1; i < lines.Length; i++)
{
if (lines[i].Length > 0) lines[i] = lines[i].Substring(prefixLength);
}
return lines;
}