To download and convert csv file to Datatable - c#

I have a requirement where i need to download a csv file from given url and then export the data from file to database.
Is there any way to download and convert the file directly to datatable rather than saving it to harddisk and then reading it to datatable.
Hope my question make some sense.
Thanks in advance

You can use StreamReader class to download csv without the need to save it locally:
public string DownloadCSV(string url)
{
HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest .GetResponse();
StreamReader streamReader = new StreamReader(httpWebResponse .GetResponseStream());
string results = streamReader.ReadToEnd();
streamReader .Close();
return results;
}

string FileName = ...
OleDbConnection conn = new OleDbConnection
("Provider=Microsoft.Jet.OleDb.4.0; Data Source = " +
Path.GetDirectoryName(FileName) +
"; Extended Properties = \"Text;HDR=YES;FMT=Delimited\"");
conn.Open();
OleDbDataAdapter adapter = new OleDbDataAdapter
("SELECT * FROM " + Path.GetFileName(FileName), conn);
DataSet ds = new DataSet("Temp");
adapter.Fill(ds);
conn.Close();

I have written five methods below.
They have been designed to take into account optional quote marks (e.g. " symbols) and to be as versatile as possible without using other libraries:
public static DataTable GetDataTabletFromCSVFile(string filePath, bool isHeadings)
{
DataTable MethodResult = null;
try
{
using (TextFieldParser TextFieldParser = new TextFieldParser(filePath))
{
if (isHeadings)
{
MethodResult = GetDataTableFromTextFieldParser(TextFieldParser);
}
else
{
MethodResult = GetDataTableFromTextFieldParserNoHeadings(TextFieldParser);
}
}
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
public static DataTable GetDataTableFromCsvString(string csvBody, bool isHeadings)
{
DataTable MethodResult = null;
try
{
MemoryStream MemoryStream = new MemoryStream();
StreamWriter StreamWriter = new StreamWriter(MemoryStream);
StreamWriter.Write(csvBody);
StreamWriter.Flush();
MemoryStream.Position = 0;
using (TextFieldParser TextFieldParser = new TextFieldParser(MemoryStream))
{
if (isHeadings)
{
MethodResult = GetDataTableFromTextFieldParser(TextFieldParser);
}
else
{
MethodResult = GetDataTableFromTextFieldParserNoHeadings(TextFieldParser);
}
}
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
public static DataTable GetDataTableFromRemoteCsv(string url, bool isHeadings)
{
DataTable MethodResult = null;
try
{
HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
StreamReader StreamReader = new StreamReader(httpWebResponse.GetResponseStream());
using (TextFieldParser TextFieldParser = new TextFieldParser(StreamReader))
{
if (isHeadings)
{
MethodResult = GetDataTableFromTextFieldParser(TextFieldParser);
}
else
{
MethodResult = GetDataTableFromTextFieldParserNoHeadings(TextFieldParser);
}
}
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
private static DataTable GetDataTableFromTextFieldParser(TextFieldParser textFieldParser)
{
DataTable MethodResult = null;
try
{
textFieldParser.SetDelimiters(new string[] { "," });
textFieldParser.HasFieldsEnclosedInQuotes = true;
string[] ColumnFields = textFieldParser.ReadFields();
DataTable dt = new DataTable();
foreach (string ColumnField in ColumnFields)
{
DataColumn DataColumn = new DataColumn(ColumnField);
DataColumn.AllowDBNull = true;
dt.Columns.Add(DataColumn);
}
while (!textFieldParser.EndOfData)
{
string[] Fields = textFieldParser.ReadFields();
for (int i = 0; i < Fields.Length; i++)
{
if (Fields[i] == "")
{
Fields[i] = null;
}
}
dt.Rows.Add(Fields);
}
MethodResult = dt;
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
private static DataTable GetDataTableFromTextFieldParserNoHeadings(TextFieldParser textFieldParser)
{
DataTable MethodResult = null;
try
{
textFieldParser.SetDelimiters(new string[] { "," });
textFieldParser.HasFieldsEnclosedInQuotes = true;
bool FirstPass = true;
DataTable dt = new DataTable();
while (!textFieldParser.EndOfData)
{
string[] Fields = textFieldParser.ReadFields();
if(FirstPass)
{
for (int i = 0; i < Fields.Length; i++)
{
DataColumn DataColumn = new DataColumn("Column " + i);
DataColumn.AllowDBNull = true;
dt.Columns.Add(DataColumn);
}
FirstPass = false;
}
for (int i = 0; i < Fields.Length; i++)
{
if (Fields[i] == "")
{
Fields[i] = null;
}
}
dt.Rows.Add(Fields);
}
MethodResult = dt;
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
If, like me, you're saving from reporting services then you should use it like this:
Warning[] warnings;
string[] streamids;
string mimeType;
string encoding;
string filenameExtension;
byte[] bytes = rvMain.ServerReport.Render("csv", null, out mimeType, out encoding, out filenameExtension, out streamids, out warnings);
string CsvBody = System.Text.Encoding.UTF8.GetString(bytes);
DataTable dt = GetDataTableFromCsvString(CsvBody,true);
Otherwise, all you need do is:
bool IsHeadings = true; //Does the data include a heading row?
DataTable dt = GetDataTableFromCsvString(CsvBody, IsHeadings);
Or to use directly from a csv file
bool IsHeadings = true; //Does the data include a heading row?
DataTable dt = GetDataTabletFromCsvFile(FilePath, IsHeadings)
Or to use a csv file that is stored remotely
bool IsHeadings = true; //Does the data include a heading row?
DataTable dt = GetDataTabletFromRemoteCsv(Url, IsHeadings)

Related

export to excel in c# where first row and 4 columns are in string & rest is in integer

protected void ibtnExport_Click(object sender, ImageClickEventArgs e)
{
try
{
DataTable dt = new DataTable();
dt = createDataTable();
dt.Columns.RemoveAt(0);
ClosedXML.Excel.XLWorkbook wbook = new ClosedXML.Excel.XLWorkbook();
wbook.Worksheets.Add(dt,lblStartDate.Text+"to"+lblEndDate.Text);
// Prepare the response
HttpResponse httpResponse = Response;
httpResponse.Clear();
httpResponse.ContentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
//Provide you file name here
httpResponse.AddHeader("content-disposition", "attachment;filename=\""+lblStartDate.Text+"to"+lblEndDate.Text+".xlsx\"");
string style = #"<style> .integermode {mso-number-format:General} </style>";
Response.Write(style);
// Flush the workbook to the Response.OutputStream
using (MemoryStream memoryStream = new MemoryStream())
{
wbook.SaveAs(memoryStream);
memoryStream.WriteTo(httpResponse.OutputStream);
memoryStream.Close();
}
httpResponse.End();
}
catch
{
Response.Write("<script>alert('Something went wrong..')</script>");
}
}
This is the code which i am using to export excel. Here the header is in string but the rest of the column is also in string, which I will need to change into a number or integer for doing addition which is ha tic for the user to perform any action on it.
Please do help me to resolve this. If you need any other part of the code please ask.
Edit:
This is the createDataTable code:
private DataTable createDataTable()
{
DataSet _ds = new DataSet();
_ds = (DataSet)ViewState["DataSet"];
DataTable dt = new DataTable();
dt.Columns.Add("ProjectID");
dt.Columns.Add("Projects");
dt.Columns.Add("Network");
dt.Columns.Add("Activity");
dt.Columns.Add("SOP");
for (int i = 0; i < _ds.Tables[1].Rows.Count; i++)
{
dt.Columns.Add(_ds.Tables[1].Rows[i]["Name"].ToString() + "(" + _ds.Tables[1].Rows[i]["SapCode"].ToString() + ")");
}
for (int i = 0; i < _ds.Tables[0].Rows.Count; i++)
{
dt.Rows.Add(_ds.Tables[0].Rows[i]["ProjectID"].ToString(), _ds.Tables[0].Rows[i]["BaseProject"].ToString(), _ds.Tables[0].Rows[i]["ProjectName"].ToString(), _ds.Tables[0].Rows[i]["Network"].ToString(), _ds.Tables[0].Rows[i]["Activity"].ToString(), _ds.Tables[0].Rows[i]["SOP"].ToString());
}
for (int j = 0; j < _ds.Tables[2].Rows.Count; j++)
{
for (int i = 0; i < _ds.Tables[0].Rows.Count; i++)
{
if(dt.Columns.Contains(_ds.Tables[2].Rows[j]["EmployeeData"].ToString()))
//if (dt.Columns[_ds.Tables[2].Rows[j]["EmployeeData"].ToString()].ColumnName!= null && dt.Columns[_ds.Tables[2].Rows[j]["EmployeeData"].ToString()].ColumnName.ToString() != "")
{
if (dt.Rows[i]["ProjectID"].ToString() == _ds.Tables[2].Rows[j]["ProjectID"].ToString() && dt.Columns[_ds.Tables[2].Rows[j]["EmployeeData"].ToString()].ColumnName.ToString() == _ds.Tables[2].Rows[j]["EmployeeData"].ToString())
{
dt.Rows[i][_ds.Tables[2].Rows[j]["EmployeeData"].ToString()] = Convert.ToDouble(_ds.Tables[2].Rows[j]["Hourspent"].ToString());
}
}
}
}
return dt;
}

I am trying to export an excel and make it password protected. My code is given below.But i am getting error

I am trying to export an excel and make it password protected.
My code is given below.
But i am getting error:
Excel completed file level validation and repair.
Some parts of this workbook may have been repaired or discarded.
I DON'T KNOW WHAT I AM DOING WRONG .
In-case i do it without the save As line for package then this error doesn't appear.
In my controller:
[HttpGet]
public FileStreamResult ExportToExcel()
{
_objService = new ServiceBAL();
List<ReconcilationEntity> Objmodel = new List<ReconcilationEntity>();
Objmodel = _objService.GetCreditsudharLeads();
String URL = string.Empty;
if (!Directory.Exists(Server.MapPath("~/TempExcel")))
{
System.IO.Directory.CreateDirectory(Server.MapPath("~/TempExcel"));
}
String Filepath = Server.MapPath("~/TempExcel");
string date = DateTime.Now.ToShortDateString().Replace("/", "_") + "_" + DateTime.Now.ToShortTimeString().Replace(" ", "_").Replace(":", "_").Trim();
String FileName = "Creditsudhar_" + date + ".xlsx";
Filepath = Filepath + "\\" + FileName;
string[] columns = { "AffName", "AffPhone", "AffEmail", "ProductName", "ContactName", "Status", "CreatedOn", "Commission", "IsCommissionPaid", "Accountname", "AccountNumber", "BankName", "BankBranch", "IFSCCode", "PanNumber" };
var file = ExcelExportHelper.ExportExcel(ExcelExportHelper.ListToDataTable(Objmodel), Filepath, "Creditsudhar Reconcillation Sheet " + DateTime.Now.ToShortDateString(), true, columns);
var memStream = new MemoryStream(file);
return this.File(memStream, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", FileName);
}
public static string ExcelContentType
{
get
{ return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"; }
}
public static DataTable ListToDataTable<T>(List<T> data)
{
PropertyDescriptorCollection properties = TypeDescriptor.GetProperties(typeof(T));
DataTable dataTable = new DataTable();
for (int i = 0; i < properties.Count; i++)
{
PropertyDescriptor property = properties[i];
dataTable.Columns.Add(property.Name, Nullable.GetUnderlyingType(property.PropertyType) ?? property.PropertyType);
}
object[] values = new object[properties.Count];
foreach (T item in data)
{
for (int i = 0; i < values.Length; i++)
{
values[i] = properties[i].GetValue(item);
}
dataTable.Rows.Add(values);
}
return dataTable;
}
public static byte[] ExportExcel(DataTable dataTable, String Filepath, string heading = "", bool showSrNo = false, params string[] columnsToTake)
{
string fullPath = string.Empty;
byte[] ret;
DeleteUploadedFile(Filepath);
String result = String.Empty;
using (ExcelPackage package = new ExcelPackage())
{
ExcelWorksheet workSheet = package.Workbook.Worksheets.Add(String.Format("{0} Data", heading));
int startRowFrom = String.IsNullOrEmpty(heading) ? 1 : 3;
if (showSrNo)
{
DataColumn dataColumn = dataTable.Columns.Add("#", typeof(int));
dataColumn.SetOrdinal(0);
int index = 1;
foreach (DataRow item in dataTable.Rows)
{
item[0] = index;
index++;
}
}
// add the content into the Excel file
workSheet.Cells["A" + startRowFrom].LoadFromDataTable(dataTable, true);
// autofit width of cells with small content
int columnIndex = 1;
foreach (DataColumn column in dataTable.Columns)
{
try
{
ExcelRange columnCells = workSheet.Cells[workSheet.Dimension.Start.Row, columnIndex, workSheet.Dimension.End.Row, columnIndex];
int maxLength = columnCells.Max(cell => cell.Value.ToString().Count());
if (maxLength < 150)
{
workSheet.Column(columnIndex).AutoFit();
}
columnIndex++;
}
catch (Exception ex)
{
if (!(ex is System.Threading.ThreadAbortException))
{
//Log other errors here
}
}
}
// format header - bold, yellow on black
using (ExcelRange r = workSheet.Cells[startRowFrom, 1, startRowFrom, dataTable.Columns.Count])
{
r.Style.Font.Color.SetColor(System.Drawing.Color.White);
r.Style.Font.Bold = true;
r.Style.Fill.PatternType = OfficeOpenXml.Style.ExcelFillStyle.Solid;
r.Style.Fill.BackgroundColor.SetColor(System.Drawing.ColorTranslator.FromHtml("#1fb5ad"));
}
// format cells - add borders
using (ExcelRange r = workSheet.Cells[startRowFrom + 1, 1, startRowFrom + dataTable.Rows.Count, dataTable.Columns.Count])
{
r.Style.Border.Top.Style = ExcelBorderStyle.Thin;
r.Style.Border.Bottom.Style = ExcelBorderStyle.Thin;
r.Style.Border.Left.Style = ExcelBorderStyle.Thin;
r.Style.Border.Right.Style = ExcelBorderStyle.Thin;
r.Style.Border.Top.Color.SetColor(System.Drawing.Color.Black);
r.Style.Border.Bottom.Color.SetColor(System.Drawing.Color.Black);
r.Style.Border.Left.Color.SetColor(System.Drawing.Color.Black);
r.Style.Border.Right.Color.SetColor(System.Drawing.Color.Black);
}
// removed ignored columns
for (int i = dataTable.Columns.Count - 1; i >= 0; i--)
{
if (i == 0 && showSrNo)
{
continue;
}
if (!columnsToTake.Contains(dataTable.Columns[i].ColumnName))
{
workSheet.DeleteColumn(i + 1);
}
}
if (!String.IsNullOrEmpty(heading))
{
workSheet.Cells["A1"].Value = heading;
workSheet.Cells["A1"].Style.Font.Size = 20;
workSheet.InsertColumn(1, 1);
workSheet.InsertRow(1, 1);
workSheet.Column(1).Width = 5;
}
System.IO.FileInfo fileinfo2 = new System.IO.FileInfo(Filepath);
DeleteUploadedFile(Filepath);
workSheet.Protection.SetPassword("myPassword");
workSheet.Protection.IsProtected = true;
workSheet.Protection.AllowSelectUnlockedCells = false;
workSheet.Protection.AllowSelectLockedCells = false;
package.SaveAs(fileinfo2, "myPassword");
ret = package.GetAsByteArray();
return ret;
}
}
public static void DeleteUploadedFile(String filePath)
{
try
{
if (System.IO.File.Exists(filePath))
{
System.IO.File.Delete(filePath);
}
}
catch (Exception ex)
{ }
}
public static byte[] ExportExcel<T>(List<T> data, String Filepath, string Heading = "", bool showSlno = false, params string[] ColumnsToTake)
{
return ExportExcel(ListToDataTable<T>(data), Filepath, Heading, showSlno, ColumnsToTake);
}
An answer mentioned SaveAs close the package, so the correct steps will be returning the saved file as array instead of using GetAsByteArray afterwards. Or simply use GetAsByteArray(passwords) without SaveAs.

How can I get values from a csv file where some of the cells contain commas?

I have a script that imports a csv file and reads each line to update the corresponding item in Sitecore. It works for many of the products but the problem is for some products where certain cells in the row have commas in them (such as the product description).
protected void SubmitButton_Click(object sender, EventArgs e)
{
if (UpdateFile.PostedFile != null)
{
var file = UpdateFile.PostedFile;
// check if valid csv file
message.InnerText = "Updating...";
Sitecore.Context.SetActiveSite("backedbybayer");
_database = Database.GetDatabase("master");
SitecoreContext context = new SitecoreContext(_database);
Item homeNode = context.GetHomeItem<Item>();
var productsItems =
homeNode.Axes.GetDescendants()
.Where(
child =>
child.TemplateID == new ID(TemplateFactory.FindTemplateId<IProductDetailPageItem>()));
try
{
using (StreamReader sr = new StreamReader(file.InputStream))
{
var firstLine = true;
string currentLine;
var productIdIndex = 0;
var industryIdIndex = 0;
var categoryIdIndex = 0;
var pestIdIndex = 0;
var titleIndex = 0;
string title;
string productId;
string categoryIds;
string industryIds;
while ((currentLine = sr.ReadLine()) != null)
{
var data = currentLine.Split(',').ToList();
if (firstLine)
{
// find index of the important columns
productIdIndex = data.IndexOf("ProductId");
industryIdIndex = data.IndexOf("PrimaryIndustryId");
categoryIdIndex = data.IndexOf("PrimaryCategoryId");
titleIndex = data.IndexOf("Title");
firstLine = false;
continue;
}
title = data[titleIndex];
productId = data[productIdIndex];
categoryIds = data[categoryIdIndex];
industryIds = data[industryIdIndex];
var products = productsItems.Where(x => x.DisplayName == title);
foreach (var product in products)
{
product.Editing.BeginEdit();
try
{
product.Fields["Product Id"].Value = productId;
product.Fields["Product Industry Ids"].Value = industryIds;
product.Fields["Category Ids"].Value = categoryIds;
}
finally
{
product.Editing.EndEdit();
}
}
}
}
// when done
message.InnerText = "Complete";
}
catch (Exception ex)
{
message.InnerText = "Error reading file";
}
}
}
The problem is that when a description field has commas, like "Product is an effective, preventative biofungicide," it gets split as well and throws off the index, so categoryIds = data[8] gets the wrong value.
The spreadsheet is data that is provided by our client, so I would rather not require the client to edit the file unless necessary. Is there a way I can handle this in my code? Is there a different way I can read the file that won't split everything by comma?
I suggest use Ado.Net, If the field's data are inside quotes and it will parse it like a field and ignore any commas inside this..
Code Example:
static DataTable GetDataTableFromCsv(string path, bool isFirstRowHeader)
{
string header = isFirstRowHeader ? "Yes" : "No";
string pathOnly = Path.GetDirectoryName(path);
string fileName = Path.GetFileName(path);
string sql = #"SELECT * FROM [" + fileName + "]";
using(OleDbConnection connection = new OleDbConnection(
#"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + pathOnly +
";Extended Properties=\"Text;HDR=" + header + "\""))
using(OleDbCommand command = new OleDbCommand(sql, connection))
using(OleDbDataAdapter adapter = new OleDbDataAdapter(command))
{
DataTable dataTable = new DataTable();
dataTable.Locale = CultureInfo.CurrentCulture;
adapter.Fill(dataTable);
return dataTable;
}
}

Populating a dataset from a CSV file

I would like to read the contents of a CSV file and create a dataset.
I am trying like this:
var lines = File.ReadAllLines("test.csv").Select(a => a.Split(';'));
DataSet ds = new DataSet();
ds.load(lines);
but apparently this is not correct.
You need to add the reference Microsoft.VisualBasic.dll to use TextFieldParser Class.
private static DataTable GetDataTabletFromCSVFile(string csv_file_path)
{
DataTable csvData = new DataTable();
try
{
using(TextFieldParser csvReader = new TextFieldParser(csv_file_path))
{
csvReader.SetDelimiters(new string[] { "," });
csvReader.HasFieldsEnclosedInQuotes = true;
string[] colFields = csvReader.ReadFields();
foreach (string column in colFields)
{
DataColumn datecolumn = new DataColumn(column);
datecolumn.AllowDBNull = true;
csvData.Columns.Add(datecolumn);
}
while (!csvReader.EndOfData)
{
string[] fieldData = csvReader.ReadFields();
//Making empty value as null
for (int i = 0; i < fieldData.Length; i++)
{
if (fieldData[i] == "")
{
fieldData[i] = null;
}
}
csvData.Rows.Add(fieldData);
}
}
}
catch (Exception ex)
{
}
return csvData;
}
}
See this article for more info : http://www.morgantechspace.com/2013/08/how-to-read-data-from-csv-file-in-c.html
You need to run a SELECT statement against the CSV file to fill the dataset:
Edit: here's some sample code from http://carllbrown.blogspot.co.uk/2007/09/populate-dataset-from-csv-delimited_18.html
string FileName = ...
OleDbConnection conn = new OleDbConnection
("Provider=Microsoft.Jet.OleDb.4.0; Data Source = " +
Path.GetDirectoryName(FileName) +
"; Extended Properties = \"Text;HDR=YES;FMT=Delimited\"");
conn.Open();
OleDbDataAdapter adapter = new OleDbDataAdapter
("SELECT * FROM " + Path.GetFileName(FileName), conn);
DataSet ds = new DataSet("Temp");
adapter.Fill(ds);
conn.Close();
You can use Library like Fast CSV Reader then
using System.IO;
using LumenWorks.Framework.IO.Csv;
void ReadCsv()
{
// open the file "data.csv" which is a CSV file with headers
using (CsvReader csv = new CsvReader(
new StreamReader("data.csv"), true))
{
myDataRepeater.DataSource = csv;
myDataRepeater.DataBind();
}
}
Comma (,) Problem Solved in This Code
Works Even If you add Commas(,) in between a cell
Reading CSV file CODE:
public MainWindow()
{
InitializeComponent();
DataTable dtDataSource = new DataTable();
string[] fileContent = File.ReadAllLines(#"..\\Book1.csv");
if (fileContent.Count() > 0)
{
//Create data table columns dynamically
string[] columns = fileContent[0].Split(',');
for (int i = 0; i < columns.Count(); i++)
{
dtDataSource.Columns.Add(columns[i]);
}
//Add row data dynamically
for (int i = 1; i < fileContent.Count(); i++)
{
string[] rowData = fileContent[i].Split(',');
string[] realRowData = new string[columns.Count()];
StringBuilder collaboration = new StringBuilder();
int v = 0;
//this region solves the problem of a cell containing ",".
#region CommaSepProblem
for (int j = 0, K = 0; j < rowData.Count(); j++, K++)
{
if ((rowData[j].Count(x => x == '"') % 2 == 0))//checks if the string contains even number of DoubleQuotes
{
realRowData[K] = quotesLogic((rowData[j]));
}
else if ((rowData[j].Count(x => x == '"') % 2 != 0))//If Number of DoubleQuotes are ODD
{
int c = rowData[j].Count(x => x == '"');
v = j;
while (c % 2 != 0)//Go through all the next array cell till it makes EVEN Number of DoubleQuotes.
{
collaboration.Append(rowData[j] + ",");
j++;
c += rowData[j].Count(x => x == '"');
}
collaboration.Append(rowData[j]);
realRowData[K] = quotesLogic(collaboration.ToString());
}
else { continue; }
}
#endregion
dtDataSource.Rows.Add(realRowData);
}
if (dtDataSource != null)
{
//dataGridView1 = new DataGridView();
dataGrid1.ItemsSource = dtDataSource.DefaultView;
}
}
}
Method Need to be added:
string quotesLogic(string collaboration)
{
StringBuilder after = new StringBuilder(collaboration);
if (after.ToString().StartsWith("\"") && after.ToString().EndsWith("\""))//removes 1st and last quotes as those are system generated
{
after.Remove(0, 1);
after.Remove(after.Length - 1, 1);
int count = after.Length - 1;
//FACT: if you try to add DoubleQuote in a cell in excel. It'll save that quote as 2 times DoubleQuote(Like "") which means first DoubleQuote is to give instruction to CPU that the next DoubleQuote is not system generated.
while (count > 0)//This loop find twice insertion of 2 DoubleQuotes and neutralise them to One DoubleQuote.
{
if (after[count] == '"' && after[count - 1] == '"')
{
after.Remove(count, 1);
}
count--;
}
}
return after.ToString();
}
If you just want to quickly create a DataTable filled with sample data from a CSV file (or pasted directly from Excel) to play around or prototype, then you can use my fork of Shan Carter's Mr. Data Converter -- I recently added the ability to output comma- and tab-delimited data to a C# DataTable.
http://thdoan.github.io/mr-data-converter/
I have written five methods below that will turn a Csv file into a DataTable.
They have been designed to take into account optional quote marks (e.g. " symbols) and to be as versatile as possible without using other libraries:
public static DataTable GetDataTabletFromCSVFile(string filePath, bool isHeadings)
{
DataTable MethodResult = null;
try
{
using (TextFieldParser TextFieldParser = new TextFieldParser(filePath))
{
if (isHeadings)
{
MethodResult = GetDataTableFromTextFieldParser(TextFieldParser);
}
else
{
MethodResult = GetDataTableFromTextFieldParserNoHeadings(TextFieldParser);
}
}
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
public static DataTable GetDataTableFromCsvString(string csvBody, bool isHeadings)
{
DataTable MethodResult = null;
try
{
MemoryStream MemoryStream = new MemoryStream();
StreamWriter StreamWriter = new StreamWriter(MemoryStream);
StreamWriter.Write(csvBody);
StreamWriter.Flush();
MemoryStream.Position = 0;
using (TextFieldParser TextFieldParser = new TextFieldParser(MemoryStream))
{
if (isHeadings)
{
MethodResult = GetDataTableFromTextFieldParser(TextFieldParser);
}
else
{
MethodResult = GetDataTableFromTextFieldParserNoHeadings(TextFieldParser);
}
}
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
public static DataTable GetDataTableFromRemoteCsv(string url, bool isHeadings)
{
DataTable MethodResult = null;
try
{
HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
StreamReader StreamReader = new StreamReader(httpWebResponse.GetResponseStream());
using (TextFieldParser TextFieldParser = new TextFieldParser(StreamReader))
{
if (isHeadings)
{
MethodResult = GetDataTableFromTextFieldParser(TextFieldParser);
}
else
{
MethodResult = GetDataTableFromTextFieldParserNoHeadings(TextFieldParser);
}
}
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
private static DataTable GetDataTableFromTextFieldParser(TextFieldParser textFieldParser)
{
DataTable MethodResult = null;
try
{
textFieldParser.SetDelimiters(new string[] { "," });
textFieldParser.HasFieldsEnclosedInQuotes = true;
string[] ColumnFields = textFieldParser.ReadFields();
DataTable dt = new DataTable();
foreach (string ColumnField in ColumnFields)
{
DataColumn DataColumn = new DataColumn(ColumnField);
DataColumn.AllowDBNull = true;
dt.Columns.Add(DataColumn);
}
while (!textFieldParser.EndOfData)
{
string[] Fields = textFieldParser.ReadFields();
for (int i = 0; i < Fields.Length; i++)
{
if (Fields[i] == "")
{
Fields[i] = null;
}
}
dt.Rows.Add(Fields);
}
MethodResult = dt;
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
private static DataTable GetDataTableFromTextFieldParserNoHeadings(TextFieldParser textFieldParser)
{
DataTable MethodResult = null;
try
{
textFieldParser.SetDelimiters(new string[] { "," });
textFieldParser.HasFieldsEnclosedInQuotes = true;
bool FirstPass = true;
DataTable dt = new DataTable();
while (!textFieldParser.EndOfData)
{
string[] Fields = textFieldParser.ReadFields();
if(FirstPass)
{
for (int i = 0; i < Fields.Length; i++)
{
DataColumn DataColumn = new DataColumn("Column " + i);
DataColumn.AllowDBNull = true;
dt.Columns.Add(DataColumn);
}
FirstPass = false;
}
for (int i = 0; i < Fields.Length; i++)
{
if (Fields[i] == "")
{
Fields[i] = null;
}
}
dt.Rows.Add(Fields);
}
MethodResult = dt;
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
If, like me, you're saving from reporting services then you should use it like this:
Warning[] warnings;
string[] streamids;
string mimeType;
string encoding;
string filenameExtension;
byte[] bytes = rvMain.ServerReport.Render("csv", null, out mimeType, out encoding, out filenameExtension, out streamids, out warnings);
string CsvBody = System.Text.Encoding.UTF8.GetString(bytes);
DataTable dt = GetDataTableFromCsvString(CsvBody,true);
Otherwise, all you need do is:
bool IsHeadings = true; //Does the data include a heading row?
DataTable dt = GetDataTableFromCsvString(CsvBody, IsHeadings);
Or to use directly from a csv file
bool IsHeadings = true; //Does the data include a heading row?
DataTable dt = GetDataTabletFromCsvFile(FilePath, IsHeadings)
Or to use a csv file that is stored remotely
bool IsHeadings = true; //Does the data include a heading row?
DataTable dt = GetDataTabletFromRemoteCsv(Url, IsHeadings)
A Dataset is a collection of DataTables, so create one like so:
DataSet ds = new DataSet();
ds.Tables.Add(dt);

C# 2.0 Fastest way to parse Excel spreadsheet [duplicate]

This question already has answers here:
Closed 10 years ago.
Possible Duplicate:
Reading Excel files from C#
What is the fastest way to read large sets of data from excel from Csharp. Example code would be great . .
In our desktop environment, I have reached the best mix between performance, flexibility and stability by using Excel via COM.
Access to Excel is always via the same thread.
I use late-binding (in VB.Net) to make my app version independent.
The rest of the application is developed in C#, only this part and some other small parts are in VB, because they are easier in VB.Net.
Dim workBook As Object = GetObject(fileName)
Dim workSheet As Object = workBook.WorkSheets.Item(WorkSheetNr)
Dim range As Object = workSheet.Cells.Item(1, 1)
Dim range2 As Object = range.CurrentRegion
Dim rrow As Integer = range2.Row ' For XL97, first convert to integer. XL97 will generate an error '
Dim rcolumn As Integer = range2.Column
Dim top As Object = workSheet.Cells.Item(rrow, rcolumn)
Dim bottom As Object = top.Offset(range2.Rows.Count - 1, range2.Columns.Count - 1)
range = workSheet.Range(top, bottom)
Dim values As Object(,)
values = range.Value
Here you have a 2-dimensional array containing the values from Excel. The last statement gets the data from Excel to .Net.
Since the limits on the size of a Excel sheet, these cannot get very large, so memory should not be a problem.
We have done some tests on performance, on multiple systems. It is optimized to create as few as possible (out-of-process) COM calls.
This way was the one that has given us the best performance, specially since the data is directly in an array, and access to this data is faster as going through a dataset.
Slow in this solution is starting Excel. But if you need to process multiple files, right after each other, the cost of starting Excel is made only once.
Also I would not use this solution in a server environment.
public class ExcelHeaderValues
{
public static string CUSIP = "CUSIP";
public static string ORIG = "ORIG";
public static string PRICE = "PRICE";
public static int COLUMNCOUNT = 3;
}
public class ExcelParser
{
private ArrayList collOutput = null;
string sSheetName = String.Empty;
string[] strValidColumn;
int validRowCnt = 0;
string sColumnPositions = String.Empty;
OleDbCommand ExcelCommand;
OleDbDataAdapter ExcelAdapter;
OleDbConnection ExcelConnection = null;
DataSet dsSheet = null;
string path = string.Empty;
string identifier = string.Empty;
public ExcelParser()
{
collOutput = new ArrayList();
}
public void Extract()
{
bool headermatch = false;
string strCusip = string.Empty, strOrig = string.Empty, strPrice = string.Empty, strData = string.Empty;
string strCusipPos = string.Empty, strPricePos = string.Empty, strOrigPos = string.Empty;
string strColumnHeader = String.Empty;
int reqColcount = 0;
string[] strTemp;
bool bTemp = false;
bool validRow = false;
DataTable schemaTable = GetSchemaTable();
validRowCnt = 0;
foreach (DataRow dr in schemaTable.Rows)
{
if (dsSheet != null)
{
dsSheet.Reset();
dsSheet = null;
}
strCusipPos = string.Empty;
strOrigPos = string.Empty;
strPricePos = string.Empty;
if (isValidSheet(dr))
{
sColumnPositions = string.Empty;
validRowCnt = 0;
foreach (DataRow dataRow in dsSheet.Tables[0].Rows)
{
sColumnPositions = string.Empty;
if (headermatch == false)
{
sColumnPositions = string.Empty;
foreach (DataColumn column in dsSheet.Tables[0].Columns)
{
strColumnHeader = dataRow[column.ColumnName].ToString().ToUpper().Trim();
strColumnHeader = strColumnHeader.ToUpper();
if (strColumnHeader == ExcelHeaderValues.ORIG.ToUpper() || strColumnHeader == ExcelHeaderValues.CUSIP.ToUpper() || strColumnHeader == ExcelHeaderValues.PRICE.ToUpper())
{
bTemp = true;
validRow = true;
reqColcount = ExcelHeaderValues.COLUMNCOUNT;
}
if (bTemp)
{
bTemp = false;
sColumnPositions += column.ColumnName + "^" + strColumnHeader + ";";
}
}
strValidColumn = sColumnPositions.Trim().Split(';');
if (validRow == true && reqColcount == strValidColumn.Length - 1)
{
headermatch = true;
break;
}
validRowCnt++;
}
}
if (headermatch == true)
{
try
{
if (dsSheet.Tables[0].Rows.Count > 0)
{
if (strValidColumn.Length > 0)
{
for (int i = 0; i < strValidColumn.Length - 1; i++)
{
if (strValidColumn[i].ToUpper().Contains("CUSIP"))
{
strTemp = strValidColumn[i].ToString().Split('^');
strCusipPos = strTemp[0].ToString();
strTemp = null;
}
if (strValidColumn[i].ToUpper().Contains("PRICE"))
{
strTemp = strValidColumn[i].ToString().Split('^');
strPricePos = strTemp[0].ToString();
strTemp = null;
}
if (strValidColumn[i].ToUpper().Contains("ORIG"))
{
strTemp = strValidColumn[i].ToString().Split('^');
strOrigPos = strTemp[0].ToString();
strTemp = null;
}
}
}
for (int iData = validRowCnt; iData < dsSheet.Tables[0].Rows.Count; iData++)
{
if (strCusipPos.Trim() != "")
strCusip = dsSheet.Tables[0].Rows[iData][strCusipPos].ToString().Trim();
if (strOrigPos.Trim() != "")
strOrig = dsSheet.Tables[0].Rows[iData][strOrigPos].ToString().Trim();
if (strPricePos.Trim() != "")
strPrice = dsSheet.Tables[0].Rows[iData][strPricePos].ToString().Trim().ToUpper();
strData = "";
if (strCusip.Length == 9 && strCusip != "" && strPrice != "" && strOrig != "" && !strPrice.ToUpper().Contains("SOLD"))
strData = strCusip + "|" + Convert.ToDouble(strOrig) * 1000000 + "|" + strPrice + "|||||";
if (strData != null && strData != "")
collOutput.Add(strData);
strCusip = string.Empty;
strOrig = string.Empty;
strPrice = string.Empty;
strData = string.Empty;
}
}
}
catch (Exception ex)
{
throw ex;
}
}
headermatch = false;
sColumnPositions = string.Empty;
strColumnHeader = string.Empty;
}
}
}
private bool isValidSheet(DataRow dr)
{
bool isValidSheet = false;
sSheetName = dr[2].ToString().ToUpper();
if (!(sSheetName.Contains("$_")) && !(sSheetName.Contains("$'_")) && (!sSheetName.Contains("Print_Titles".ToUpper())) && (dr[3].ToString() == "TABLE" && ((!sSheetName.Contains("Print_Area".ToUpper())))) && !(sSheetName.ToUpper() == "DLOFFERLOOKUP"))
{
if (sSheetName.Trim().ToUpper() != "Disclaimer$".ToUpper() && sSheetName.Trim().ToUpper() != "Summary$".ToUpper() && sSheetName.Trim().ToUpper() != "FORMULAS$" )
{
string sQry = string.Empty;
sQry = "SELECT * FROM [" + sSheetName + "]";
ExcelCommand = new OleDbCommand(sQry, ExcelConnection);
dsSheet = new DataSet();
ExcelAdapter = new OleDbDataAdapter(ExcelCommand);
isValidSheet = false;
try
{
ExcelAdapter.Fill(dsSheet, sSheetName);
isValidSheet = true;
}
catch (Exception ex)
{
isValidSheet = false;
throw new Exception(ex.Message.ToString());
}
finally
{
ExcelAdapter = null;
ExcelCommand = null;
}
}
}
return isValidSheet;
}
private DataTable GetSchemaTable()
{
DataTable dt = null;
string connectionString = String.Empty;
connectionString = GetConnectionString();
ExcelConnection = new OleDbConnection(connectionString);
try
{
ExcelConnection.Open();
dt = ExcelConnection.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, new object[] { null, null, null, "TABLE" });
}
catch (Exception ex)
{
throw ex;
}
return dt;
}
private string GetConnectionString()
{
string connStr = String.Empty;
try
{
if (path.ToLower().Contains(".xlsx"))
{
connStr = "Provider=Microsoft.ACE.OLEDB.12.0;" + "Data Source='" + path + "';" + "Extended Properties='Excel 12.0;HDR=No;IMEX=1;'";
}
else if (path.ToLower().Contains(".xlsm"))
{
connStr = "Provider=Microsoft.ACE.OLEDB.12.0;" + "Data Source='" + path + "';" + "Extended Properties='Excel 12.0 Macro;HDR=No;IMEX=1;'";
}
else if (path.ToLower().Contains(".xls"))
{
connStr = "provider=Microsoft.Jet.OLEDB.4.0;Data Source='" + path + "';Extended Properties='Excel 8.0;HDR=No;IMEX=1;'";
}
else
{connStr = "Provider=Microsoft.ACE.OLEDB.12.0;" + "Data Source='" + path + "';" + "Extended Properties='HTML Import;IMEX=1;HDR=No;'";
}
}
catch (Exception ex)
{
throw ex;
}
return connStr;
}
}

Categories

Resources