This is a take on some experimental code that #Tim Schmelter pointed me in the correct direction towards earlier this afternoon. The majority of it is almost exactly the same as what worked earler, but it is throwing a invalidCastException on the last line or second last line, depending whichever i try. I cannot see why this is.
Boolean test = false;
string filePathStudent = System.IO.Path.GetFullPath("StudentInfo.txt");
DataTable studentDataTable = new DataTable();
studentDataTable.Columns.Add("Id", typeof(int));
studentDataTable.Columns.Add("StudentID");
studentDataTable.Columns.Add("FirstName");
studentDataTable.Columns.Add("LastName");
studentDataTable.Columns.Add("StreetAdd");
studentDataTable.Columns.Add("City");
studentDataTable.Columns.Add("State");
studentDataTable.Columns.Add("Zip");
studentDataTable.Columns.Add("Choice1");
studentDataTable.Columns.Add("CreditHrs1");
studentDataTable.Columns.Add("Choice2");
studentDataTable.Columns.Add("CreditHrs2");
studentDataTable.Columns.Add("Choice3");
studentDataTable.Columns.Add("CreditHrs3");
studentDataTable.Columns.Add("Choice4");
studentDataTable.Columns.Add("CreditHrs4");
studentDataTable.Columns.Add("Choice5");
studentDataTable.Columns.Add("CreditHrs5");
studentDataTable.Columns.Add("Choice6");
studentDataTable.Columns.Add("CreditHrs6");
foreach (string line in File.ReadLines(filePathStudent))
{
DataRow row = studentDataTable.Rows.Add();
string[] fields = line.Split(new[] { (char)9 });
int id;
if (fields.Length == 19 && int.TryParse(fields[0], out id))
{
row.SetField("Id", id);
row.SetField("StudentID", fields[1]);
row.SetField("FirstName", fields[2]);
row.SetField("LastName", fields[3]);
row.SetField("StreetAdd", fields[4]);
row.SetField("City", fields[5]);
row.SetField("State", fields[6]);
row.SetField("Zip", fields[7]);
row.SetField("Choice1", fields[8]);
row.SetField("CreditHrs1", fields[9]);
row.SetField("Choice2", fields[10]);
row.SetField("CreditHrs2", fields[11]);
row.SetField("Choice3", fields[12]);
row.SetField("CreditHrs3", fields[13]);
row.SetField("Choice4", fields[14]);
row.SetField("CreditHrs4", fields[15]);
row.SetField("Choice5", fields[16]);
row.SetField("CreditHrs5", fields[17]);
row.SetField("Choice6", fields[18]);
row.SetField("CreditHrs6", fields[19]);
}
}
using (StreamReader reader = new StreamReader(filePathStudent))
{
String line1 = reader.ReadLine();
if (line1 == null)
maxIDStdTable = 0;
else
test = true;
reader.Dispose();
reader.Close();
}
if(test)
int maxIDStdTable = studentDataTable.AsEnumerable().Max(r => r.Field<int>("Id"));
//int maxIDStdTable = (int)studentDataTable.Compute("Max(Id)", "");
You have made two mistakes :
1) You have create new DataRow with DataTable.NewRow()
2) Sfter setting DataRow you have to add it to the DataTable with DataTable.Rows.Add(youDataRow).
Update your code as and try:
foreach (string line in File.ReadLines(filePathStudent))
{
DataRow row = studentDataTable.NewRow();
string[] fields = line.Split(new[] { (char)9 });
int id;
if (fields.Length == 19 && int.TryParse(fields[0], out id))
{
row["Id"]= id;
row["StudentID"]= fields[1];
row["FirstName"]= fields[2];
row[LastName"]= fields[3];
row["StreetAdd"]= fields[4];
row["City"]=fields[5];
row["State"]= fields[6];
row["Zip"]=fields[7];
row["Choice1"]= fields[8];
row["CreditHrs1"]= fields[9];
row["Choice2"]= fields[10];
row["CreditHrs2"]= fields[11];
row[("Choice3"]= fields[12];
row["CreditHrs3"]=, fields[13];
row["Choice4"]= fields[14];
row["CreditHrs4"]= fields[15];
row["Choice5"]= fields[16];
row["CreditHrs5"]= fields[17];
row["Choice6"]= fields[18];
row["CreditHrs6"] =fields[19];
}
studentDataTable.Rows.Add(row);
}
It might not be the best solution, but it works.
string filePathStudent = System.IO.Path.GetFullPath("StudentInfo.txt");
DataTable studentDataTable = new DataTable();
studentDataTable.Columns.Add("Id", typeof(Int32));
studentDataTable.Columns.Add("StudentID");
studentDataTable.Columns.Add("FirstName");
studentDataTable.Columns.Add("LastName");
studentDataTable.Columns.Add("StreetAdd");
studentDataTable.Columns.Add("City");
studentDataTable.Columns.Add("State");
studentDataTable.Columns.Add("Zip");
studentDataTable.Columns.Add("Choice1");
studentDataTable.Columns.Add("CreditHrs1");
studentDataTable.Columns.Add("Choice2");
studentDataTable.Columns.Add("CreditHrs2");
studentDataTable.Columns.Add("Choice3");
studentDataTable.Columns.Add("CreditHrs3");
studentDataTable.Columns.Add("Choice4");
studentDataTable.Columns.Add("CreditHrs4");
studentDataTable.Columns.Add("Choice5");
studentDataTable.Columns.Add("CreditHrs5");
studentDataTable.Columns.Add("Choice6");
studentDataTable.Columns.Add("CreditHrs6");
// Read in a file line-by-line, and store it
var txtFileLine = File.ReadAllLines(filePathStudent).ToList();
//Reads line splits data to colums at tab (ASCII value 9)
txtFileLine.ForEach(line => studentDataTable.Rows.Add(line.Split((char)9)));
List<int> rowsForColumn1 = studentDataTable.AsEnumerable().Select(x => x.Field<int>(0)).ToList();
//Tests for empty Datatable
foreach (DataRow row in studentDataTable.Rows)
{
if (row.IsNull("Id"))
break;
else
//get max value from "Id" row.
maxIDStdTable = rowsForColumn1.Max();
}
}
Related
I am taking an excel file to read the fields and pass it to a class, to be able to enter the DB, when I pass dt.AsEnumerable, it is giving me an error in the "Linea" field. This cell sometimes comes with the $ sign. I think that is what is generating the error, so i am trying to replace the character and then convert it to int, since it is an amount field.
using (var streamExcel = System.IO.File.Create(combineFilePath))
{
await file.CopyToAsync(streamExcel);
}
using (var excelWorkbook = new XLWorkbook(combineFilePath))
{
IXLWorksheet workSheet = excelWorkbook.Worksheet(2);
workSheet.Clear(XLClearOptions.AllFormats);
DataTable dt = new DataTable();
bool firstRow = true;
foreach (IXLRow row in workSheet.Rows())
{
//for row number check
if (firstRow)
{
foreach (IXLCell cell in row.Cells())
{
dt.Columns.Add(cell.Value.ToString());
}
firstRow = false;
}
else
{
//Add rows to DataTable.
dt.Rows.Add();
int i = 0;
foreach (IXLCell cell in row.Cells(1, 50))
{
if (cell.Address.ColumnNumber == 11)
{
workSheet.Cell(11, 11).Style.NumberFormat.Format = "#,##0";
cell.SetDataType(XLDataType.Number);
}
dt.Rows[dt.Rows.Count - 1][i] = cell.CachedValue.ToString();
i++;
}
}
}
try
{
var companys = dt.AsEnumerable().Select(row => new Company
{
Name = row.Field<string>("Nombre"),
Rut = row.Field<string>("Rut"),
Address = row.Field<string>("Dirección"),
AddressNumber = row.Field<string>(#"Nº"),
Location = row.Field<string>("Comuna"),
Region = row.Field<string>("Región"),
Giro = row.Field<string>("Giro Cliente"),
Linea = Convert.ToInt32(row.Field<string>("Monto línea Aprobada").Trim().TrimEnd().Replace(#"$", "")),
Observations = row.Field<string>("Observaciones Comité"),
}).ToList();
UserId = _companyService.AddList(companys);
}
catch (Exception e)
{
}
To visualize where it's failing, you could do something like this:
try{
var companysB = dt.AsEnumerable().Select(row => new
{
Name = row.Field<string>("Nombre"),
LineaRaw = row.Field<string>("Monto línea Aprobada"),
LineaProcessed = row.Field<string>("Monto línea Aprobada").Trim().TrimEnd().Replace(#"$", ""),
})
.ToList();
}
Put a breakpoint on 'companysB' and inspect after it's filled. One or more of the LineaRaw / LineaProcessed will not be a number.
The error was when the "Monto línea Aprobada" cell had a blank value, I did this:
var n = 0;
string cellEmty = "";
foreach (DataRow rowEmpty in dt.Rows)
{
cellEmty = rowEmpty["Monto línea Aprobada"].ToString();
if (string.IsNullOrEmpty(cellEmty))
{
cellEmty = "0";
dt.Rows[n].SetField("Monto línea Aprobada", cellEmty);
}
n++;
}
I have a large csv file which has millions of rows. The sample csv lines are
CODE,COMPANY NAME, DATE, ACTION
A,My Name , LLC,2018-01-28,BUY
B,Your Name , LLC,2018-01-25,SELL
C,
All Name , LLC,2018-01-21,SELL
D,World Name , LLC,2018-01-20,BUY
Row C has new line, but actually this is same record. I want to remove new line character from the csv line within cell\field\column.
I tired \r\n, Envirnment.NewLine and many other things, but could not make it work.
Here is my code..
private DataTable CSToDataTable(string csvfile)
{
Int64 row = 0;
try
{
string CSVFilePathName = csvfile; //#"C:\test.csv";
string[] Lines = File.ReadAllLines(CSVFilePathName.Replace(Environment.NewLine, ""));
string[] Fields;
Fields = Lines[0].Split(new char[] { ',' });
int Cols = Fields.GetLength(0);
DataTable dt = new DataTable();
//1st row must be column names; force lower case to ensure matching later on.
for (int i = 0; i < Cols; i++)
dt.Columns.Add(Fields[i].ToLower(), typeof(string));
DataRow Row;
for (row = 1; row < Lines.GetLength(0); row++)
{
Fields = Lines[row].Split(new char[] { ',' });
Row = dt.NewRow();
//Console.WriteLine(row);
for (int f = 0; f < Cols; f++)
{
Row[f] = Fields[f];
}
dt.Rows.Add(Row);
if (row == 190063)
{
}
}
return dt;
}
catch (Exception ex)
{
throw ex;
}
}
How can I remove new line character and read the row correctly? I don't want to skip the such rows as per the business requirement.
You CSV file is not in valid format. In order to parse and load them successfully, you will have to sanitize them. Couple of issues
COMPANY NAME column contains field separator in it. Fix them by
surrounding quotes.
New line in CSV value - This can be fixed by combining adjacent rows as one.
With Cinchoo ETL, you can sanitize and load your large file as below
string csv = #"CODE,COMPANY NAME, DATE, ACTION
A,My Name , LLC,2018-01-28,BUY
B,Your Name , LLC,2018-01-25,SELL
C,
All Name , LLC,2018-01-21,SELL
D,World Name , LLC,2018-01-20,BUY";
string bufferLine = null;
var reader = ChoCSVReader.LoadText(csv)
.WithFirstLineHeader()
.Setup(s => s.BeforeRecordLoad += (o, e) =>
{
string line = (string)e.Source;
string[] tokens = line.Split(",");
if (tokens.Length == 5)
{
//Fix the second and third value with quotes
e.Source = #"{0},""{1},{2}"",{3}, {4}".FormatString(tokens[0], tokens[1], tokens[2], tokens[3], tokens[4]);
}
else
{
//Fix the breaking lines, assume that some csv lines broken into max 2 lines
if (bufferLine == null)
{
bufferLine = line;
e.Skip = true;
}
else
{
line = bufferLine + line;
tokens = line.Split(",");
e.Source = #"{0},""{1},{2}"",{3}, {4}".FormatString(tokens[0], tokens[1], tokens[2], tokens[3], tokens[4]);
line = null;
}
}
});
foreach (var rec in reader)
Console.WriteLine(rec.Dump());
//Careful to load millions rows into DataTable
//var dt = reader.AsDataTable();
Hope it helps.
You haven't made it clear what are the possible criteria an unwanted new line could appear in the file. So assuming that a 'proper' line in the CSV file does NOT end with a comma, and if one ends with a comma that means that it's not a properly formatted line, you could do something like this:
static void Main(string[] args)
{
string path = #"CSVFile.csv";
List<CSVData> data = new List<CSVData>();
using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read))
{
using (StreamReader sr = new StreamReader(fs))
{
sr.ReadLine(); // Header
while (!sr.EndOfStream)
{
var line = sr.ReadLine();
while (line.EndsWith(","))
{
line += sr.ReadLine();
}
var items = line.Split(new string[] { "," }, StringSplitOptions.None);
data.Add(new CSVData() { CODE = items[0], NAME = items[1], COMPANY = items[2], DATE = items[3], ACTION = items[4] });
}
}
}
Console.ReadLine();
}
public class CSVData
{
public string CODE { get; set; }
public string NAME { get; set; }
public string COMPANY { get; set; }
public string DATE { get; set; }
public string ACTION { get; set; }
}
Obviously there's a lot of error handling to be done here (for example, when creating a new CSVData object make sure your items contain all the data you want), but I think this is the start you need.
I am using SmartXLSX to read excel files successfully, but I'm facing a problem. My program is reading the column header's as a new row along with all the other rows from the spreadsheet. How should I update my program to skip the column header's and read the other rows.
private void GetCompanies()
{
int count = 0;
Companies = new List<Company>();
string directory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
string path = Path.Combine(directory, "Toll-Companies.xlsx");
SmartXLS.WorkBook WB = new WorkBook();
WB.readXLSX(path);
DataTable dt = WB.ExportDataTable();
string CurrentType = string.Empty;
string CurrentCategory = string.Empty;
string Removerow = string.Empty;
string b,c;
//DataRow rowe = dt.Rows[0];
//dt.Rows.Remove(rowe);
//loop through each row
foreach (DataRow dr in dt.Rows)
{
//Get company name in column c from Excel
c = dr[2].ToString();
//Get value in column B from Excel
b = dr[0].ToString();
if (b.StartsWith("Type:"))
{
CurrentType = b.Substring(6).Trim();
}
if (b.StartsWith("Primary Specialty"))
{
CurrentCategory = b.Substring(20).Trim();
}
//if company name is empy then skip row
if (string.IsNullOrEmpty(c)) continue;
//string Type = dr[7].ToString();
//dt.Columns.Add(Type)
//string cmp_type = dr[0].ToString();
//if (string.IsNullOrEmpty(cmp_type) || cmp_type == "Type") continue;
var cmp = new Company();
cmp.company_type = CurrentType;
cmp.company_category = CurrentCategory;
cmp.name = dr[2].ToString();
Companies.Add(cmp);
count++;
}
This will get the result you are seeking. I am unsure if you were looking for a method in the SmartXLSX library.
foreach (DataRow dr in dt.Rows)
{
if(count==0) continue;
You can use for and skip first row...
I am currently making a ASP.NET MVC application for users to upload other users to a database from csv and excel.
And while working on the csv-uploader method I have come across a problem where the TextFieldParser - csvReader only reads the first column name and then move on to try and add it to every column in the DataTable and thus it gives the "DuplicateNameException in System.Data.dll".
And I have tried to find a counter that helps against this and i have also changed the delimiter type resulting in every column staying in the first DataTable column instead, meaning that the delimiters is not the problem.
And there is a breakpoint so I could see that the colFields counter manages to find all columns in the csv-file though it only tries to add the first one to the DataTable.
When searching for the problem the only thing I come accross is when there is deliberate code to try and implement the same column over and over, but I do not think that I have anything like that.
The method just stop trying to add the next column and tries to add the same first one instead.
And as a result there is the "DuplicateNameException",
anyone can see why that is?
Here is my code and it is placed in a controller:
[HttpPost]
public ActionResult Import(HttpPostedFileBase file)
{
DataTable csvData = new DataTable();
if (file != null && file.ContentLength > 0)
{
try
{
using (TextFieldParser csvReader = new TextFieldParser(file.InputStream))
{
//TODO:Skapa en lista eller liknande för delimiter val om möjligt.
csvReader.SetDelimiters(new string[] { ";" });
csvReader.HasFieldsEnclosedInQuotes = false;
string[] colFields = csvReader.ReadFields();
foreach (string column in colFields)
{
//TODO:Nuvarande error: DuplicateNameException, den lägger Author title på två olika kolumner och ger exception när detta upptäcks.
DataColumn Titel = new DataColumn(column);
Titel.AllowDBNull = true;
csvData.Columns.Add(Titel);
DataColumn FirstName = new DataColumn(column);
FirstName.AllowDBNull = true;
csvData.Columns.Add(FirstName);
DataColumn LastName = new DataColumn(column);
LastName.AllowDBNull = true;
csvData.Columns.Add(LastName);
DataColumn AbstrNum = new DataColumn(column);
AbstrNum.AllowDBNull = true;
csvData.Columns.Add(AbstrNum);
DataColumn PosterTitel = new DataColumn(column);
PosterTitel.AllowDBNull = true;
csvData.Columns.Add(PosterTitel);
DataColumn Workshop = new DataColumn(column);
Workshop.AllowDBNull = true;
csvData.Columns.Add(Workshop);
DataColumn Keywords = new DataColumn(column);
Keywords.AllowDBNull = true;
csvData.Columns.Add(Keywords);
DataColumn Institution = new DataColumn(column);
Institution.AllowDBNull = true;
csvData.Columns.Add(Institution);
DataColumn CollabEmail = new DataColumn(column);
CollabEmail.AllowDBNull = true;
csvData.Columns.Add(CollabEmail);
}
while (!csvReader.EndOfData)
{
string[] fieldData = csvReader.ReadFields();
//Making empty value as null
for (int i = 0; i < fieldData.Length; i++)
{
if (fieldData[i] == "")
{
fieldData[i] = null;
}
}
csvData.Rows.Add(fieldData);
}
}
//Fortfarande i try...
SqlConnectionStringBuilder cb = new SqlConnectionStringBuilder();
cb.DataSource="HELMER/SQLEXPRESS";
cb.InitialCatalog="TestDB";
cb.IntegratedSecurity=true;
SqlConnection cnn = new SqlConnection(cb.ConnectionString);
SqlCommand cmd = new SqlCommand("SELECT * FROM csvData", cnn);
cnn.Open();
SqlDataReader rdr=cmd.ExecuteReader();
SqlBulkCopy sbc= new SqlBulkCopy("server=.;database=TestDB;" + "Integrated Security=SSPI");
sbc.DestinationTableName = "Users";
sbc.WriteToServer(rdr);
sbc.Close();
rdr.Close();
cnn.Close();
}
catch (Exception ex)
{
}
}
return RedirectToAction("Index", "Home", new { Message = "The Import was a success" });
}
I have taken some source material from this site:
http://www.morgantechspace.com/2013/10/import-csv-file-into-sql-server-using.html
And also I am still pretty new to this site, though it has been a week right about if there is anything I am doing wrong.
Try this :
foreach (string column in colFields)
{
DataColumn datecolumn = new DataColumn(column);
datecolumn.AllowDBNull = true;
csvData.Columns.Add(datecolumn);
}
Once you created the columns from colFields and you can add rows to the created columns.
while (!csvReader.EndOfData)
{
string[] fieldData = csvReader.ReadFields();
//Making empty value as null
for (int i = 0; i < fieldData.Length; i++)
{
if (fieldData[i] == "")
{
fieldData[i] = null;
}
}
csvData.Rows.Add(fieldData); // Here rows are added to the created columns
}
}
I'm using this code to parse the values and store them in List. The first row has names which are getting stored fine. But when storing values, only the second row is bring saved. I'm not sure what edit I need to make so that it parses all other rows as well.
Please see image and code below.
List<string> names = new List<string>(); // List to store Key names
List<string> values = new List<string>(); // List to store key values
using (StreamReader stream = new StreamReader(filePath))
{
names = stream.ReadLine().Split(',').ToList(); // Seperate key names and store them in a List
values = stream.ReadLine().Split(',').ToList(); // Seperate key values and store them in a list
}
See if something like this works better:
// List to store Key names
List<string> names = new List<string>();
// List to store key values
List<List<string>> values = new List<string>();
using (StreamReader stream = new StreamReader(filePath))
{
if(!stream.EndOfStream)
{
// Seperate key names and store them in a List
names = stream.ReadLine().Split(',').ToList();
}
while(!stream.EndOfStream)
{
// Seperate key values and store them in a list
values.Add(stream.ReadLine().Split(',').ToList());
}
}
This changes your values list to be a list of a list of strings so that each row will a list of string
While this probably isn't the best way to parse a .csv, if your data is consistent and the file format is strongly consistent you can probably get away with doing it like this. As soon as you try this with odd values, quoted strings, strings with commas, etc., you'll need a different approach.
i have written the code for grid view make changes it to a list.I think it will help
protected void Button1_Click(object sender, EventArgs e)
{
if (FileUpload1.HasFile)
{
string s = FileUpload1.FileName.Trim();
if (s.EndsWith(".csv"))
{
FileUpload1.PostedFile.SaveAs(Server.MapPath("~/data/" + s));
string[] readText = File.ReadAllLines(Server.MapPath("~/data/" + s));
DataSet ds = new DataSet();
DataTable dt = new DataTable();
// Array.Sort(readText);
for (int i = 0; i < readText.Length; i++)
{
if (i == 0)
{
string str = readText[0];
string[] header = str.Split(',');
dt.TableName = "sal";
foreach (string k in header)
{
dt.Columns.Add(k);
}
}
else
{
DataRow dr = dt.NewRow();
string str1 = readText[i];
if (readText[i] == ",,,,")
{
break;
}
string[] rows = str1.Split(',');
if (dt.Columns.Count == rows.Length)
{
for (int z = 0; z < rows.Length; z++)
{
if (rows[z] == "")
{
rows[z] = null;
}
dr[z] = rows[z];
}
dt.Rows.Add(dr);
}
else
{
Label1.Text = "please select valid format";
}
}
}
//Iterate through the columns of the datatable to set the data bound field dynamically.
ds.Merge(dt);
Session["tasktable"] = dt;
foreach (DataColumn col in dt.Columns)
{
BoundField bf = new BoundField();
bf.DataField = col.ToString();
bf.HeaderText = col.ColumnName;
if (col.ToString() == "Task")
{
bf.SortExpression = col.ToString();
}
GridView1.Columns.Add(bf);
}
GridView1.DataSource = ds;
GridView1.DataBind();
}
else
{
Label1.Text = "please select a only csv format";
}
}
else
{
Label1.Text = "please select a file";
}
}