I am trying to bulk insert a CSV file into a SQL Server database.
The process is .CSV file to DataTable to SqlBulkCopy to SQL Server.
When I run this I get this error:
The given ColumnMapping does not match up with any column in the source or destination
When I directly import the CSV into SQL Server via Management Studio it works! So I think the problem is my data table conversion?
Here is my code to go from .CSV to DataTable:
public DataTable CsvFileToDataTable(string filePath)
{
var csvData = new DataTable("Table1");
using (var csvReader = new TextFieldParser(filePath))
{
csvReader.SetDelimiters(new string[] { "," });
csvReader.HasFieldsEnclosedInQuotes = true;
var readFields = csvReader.ReadFields();
foreach (var dataColumn in readFields.Select(column => new DataColumn(column) {AllowDBNull = true }))
{
csvData.Columns.Add(dataColumn);
}
while (!csvReader.EndOfData)
{
var data = csvReader.ReadFields();
for (var i = 0; i < data.Length; i++)
{
if (data[i] == "")
{
data[i] = null;
}
}
csvData.Rows.Add(data);
}
}
return csvData;
}
And here is the code for the bulk copy insert:
public void InsertData(DataTable table)
{
using (var transactionScope = new TransactionScope())
{
using (var sqlConnection = new SqlConnection(this.ConnectionString))
{
sqlConnection.Open();
using (var sqlBulkCopy = new SqlBulkCopy(sqlConnection))
{
sqlBulkCopy.DestinationTableName = table.TableName;
foreach (var column in table.Columns)
{
sqlBulkCopy.ColumnMappings.Add(column.ToString(), column.ToString());
}
sqlBulkCopy.WriteToServer(table);
}
transactionScope.Complete();
}
}
}
Does anyone have any suggestions?
Thanks
Turns out If I tweked each method
foreach (var dataColumn in readFields.Select(column => new DataColumn(columntypeof(string)) { AllowDBNull = true, DefaultValue = string.Empty }))
{
csvData.Columns.Add(dataColumn);
}
and in the second Method I changed it to use an index rather than column name
for (var count = 0; count < table.Columns.Count; count++)
{
sqlBulkCopy.ColumnMappings.Add(count, count);
}
I was able to use these code samples to create a working solution that reads a csv, checks if there is data, and if there is clean out the data and import all the csv data to the table:
private static void ProcessFile(string FilePath, string TableName)
{
var dt = GetDataTable(FilePath, TableName);
if (dt == null)
{
return;
}
if (dt.Rows.Count == 0)
{
AuditLog.AddInfo("No rows imported after reading file " + FilePath);
return;
}
ClearData(TableName);
InsertData(dt);
}
private static DataTable GetDataTable(string FilePath, string TableName)
{
var dt = new DataTable(TableName);
using (var csvReader = new TextFieldParser(FilePath))
{
csvReader.SetDelimiters(new string[] { "," });
csvReader.HasFieldsEnclosedInQuotes = true;
var readFields = csvReader.ReadFields();
if (readFields == null)
{
AuditLog.AddInfo("Could not read header fields for file " + FilePath);
return null;
}
foreach (var dataColumn in readFields.Select(column => new DataColumn(column, typeof(string)) { AllowDBNull = true, DefaultValue = string.Empty }))
{
dt.Columns.Add(dataColumn);
}
while (!csvReader.EndOfData)
{
var data = csvReader.ReadFields();
if (data == null)
{
AuditLog.AddInfo(string.Format("Could not read fields on line {0} for file {1}", csvReader.LineNumber, FilePath));
continue;
}
var dr = dt.NewRow();
for (var i = 0; i < data.Length; i++)
{
if (!string.IsNullOrEmpty(data[i]))
{
dr[i] = data[i];
}
}
dt.Rows.Add(dr);
}
}
return dt;
}
private static void ClearData(string TableName)
{
SqlHelper.ExecuteNonQuery(ConfigurationUtil.ConnectionString, CommandType.Text, "TRUNCATE TABLE " + TableName);
}
private static void InsertData(DataTable table)
{
using (var sqlBulkCopy = new SqlBulkCopy(ConfigurationUtil.ConnectionString))
{
sqlBulkCopy.DestinationTableName = table.TableName;
for (var count = 0; count < table.Columns.Count; count++)
{
sqlBulkCopy.ColumnMappings.Add(count, count);
}
sqlBulkCopy.WriteToServer(table);
}
}
Related
I am using an SqlBulkCopy to insery 10 000+ records into my DB but it won't work because of the DbGeography point.
I keep getting an exception message that gets thrown:
"Specified type is not registered on the target server.System.Data.Entity.Spatial.DbGeography".
This is my code.
public void AddBulkRange(string tableName, List<TEntity> entities)
{
using (var context = new TContext())
{
context.Configuration.AutoDetectChangesEnabled = false;
context.Configuration.ValidateOnSaveEnabled = false;
string conectionString = context.Database.Connection.ConnectionString;
using (var connection = new SqlConnection(conectionString))
{
connection.Open();
SqlTransaction transaction = connection.BeginTransaction();
using (var bulkCopy = new SqlBulkCopy(connection, SqlBulkCopyOptions.Default, transaction))
{
bulkCopy.BatchSize = 2000;
bulkCopy.DestinationTableName = "dbo." + tableName;
try
{
DataTable data = DataReaderConverterHelper.ToDataTable(entities);
//This just explicitly maps the columns in sqlBulkCopy to the table columns
foreach (DataColumn column in data.Columns)
{
bulkCopy.ColumnMappings.Add(new SqlBulkCopyColumnMapping(column.ColumnName, column.ColumnName));
}
bulkCopy.WriteToServer(data);
}
catch (Exception e)
{
transaction.Rollback();
connection.Close();
}
}
transaction.Commit();
}
}
}
This is the ToDataTable Method.
public static DataTable ToDataTable<TEntity>(this IList<TEntity> data)
{
PropertyDescriptorCollection properties = TypeDescriptor.GetProperties(typeof(TEntity));
DataTable dt = new DataTable();
for (int i = 0; i < properties.Count; i++)
{
PropertyDescriptor property = properties[i];
dt.Columns.Add(property.Name, Nullable.GetUnderlyingType(
property.PropertyType) ?? property.PropertyType);
}
object[] values = new object[properties.Count];
foreach (TEntity item in data)
{
for (int i = 0; i < values.Length; i++)
{
values[i] = properties[i].GetValue(item);
}
dt.Rows.Add(values);
}
return dt;
}
The problem is the DbGeography point is throwing the exception.
The field type in my model.
public System.Data.Entity.Spatial.DbGeography geography_point { get; set; }
In the DB the field type is Geography.
I can insert if I use the method below but the problem is I get a timeout error that's why I want to use the SqlBulkCopy but as I said above it doesn't work because of that one data type.
public List<TEntity> AddRange(List<TEntity> entities)
{
int takeCount = 100;
int counter = (entities.Count % takeCount) == 0
? (entities.Count / takeCount)
: (entities.Count / takeCount) + 1;
for (int i = 0; i < counter; i++)
{
using (var context = new TContext())
{
List<TEntity> subList = entities.Skip(i * takeCount).Take(takeCount).ToList();
context.Configuration.AutoDetectChangesEnabled = false;
context.Configuration.ValidateOnSaveEnabled = false;
context.Set<TEntity>().AddRange(subList);
context.SaveChanges();
context.Dispose();
}
}
return entities;
}
Thanks to some input from #AlwaysLearning I made some changes and got the DbGeography working with the Data Table.
public static DataTable ToDataTable<TEntity>(this IList<TEntity> data)
{
PropertyDescriptorCollection properties = TypeDescriptor.GetProperties(typeof(TEntity));
DataTable dt = new DataTable();
for (int i = 0; i < properties.Count; i++)
{
PropertyDescriptor property = properties[i];
if (property.PropertyType.Name == "DbGeography")
{
dt.Columns.Add(property.Name, typeof(SqlGeography));
continue;
}
dt.Columns.Add(property.Name, Nullable.GetUnderlyingType(
property.PropertyType) ?? property.PropertyType);
}
object[] values = new object[properties.Count];
foreach (TEntity item in data)
{
for (int i = 0; i < 20; i++)
{
if (properties[i].PropertyType.Name == "DbGeography")
{
DbGeography Point = (DbGeography)properties[i].GetValue(item);
SqlGeography newGeography = SqlGeography.Parse(Point.AsText()).MakeValid();
values[i] = newGeography;
continue;
}
values[i] = properties[i].GetValue(item);
}
dt.Rows.Add(values);
}
return dt;
}
All I did was convert the DbGeography data type to the SqlGeography type and it worked perfectly.
public void DecryptFile(string filePath, string CompanyName)
{
using (var fileStream = new FileStream(filePath, FileMode.Open))
{
using (var gzStream = new GZipStream(fileStream, CompressionMode.Decompress))
{
using (var outputStream = new MemoryStream())
{
gzStream.CopyTo(outputStream);
byte[] outputBytes = outputStream.ToArray();
// DeserailizeByteArrayToDatatable(outputBytes, CompanyName);
string FileContents = Encoding.ASCII.GetString(outputBytes);
DataTable dt = new DataTable();
string[] arrayList = FileContents.Split("\n");
}
}
}
}
I have decrypted contents of file and now I want to fill the data to DataTable. The format of file contents is:
serial_number,peak_period_start_time,stop_time
15AA01AF361903PC,1602619200,1602615600
15AA01AF361902QB,1602619200,1602615600
15AA01AF361906YL,1602619200,1602630000
09AA01AF32190YXP,1602619200,1602630000
so I want to fil the first line as column in DataTable and rest all as rows and aslo additionally I want to change the start_time and stop time to correct format
I would suggest you to first create a method that will accept a DataTable and an array of columns and will add columns to the DataTable.
public DataTable AddColumns(DataTable dt, string[] columns)
{
if (dt == null) dt = new DataTable();
foreach (string column in columns)
{
dt.Columns.Add(column.Trim());
}
return dt;
}
Then, Create a method that will populate data into that DataTable:
public DataTable PopulateData(DataTable dt, string[] dataLines)
{
if (dataLines == null || dataLines.Length == 0) return dt;
foreach (string line in dataLines)
{
var splittedLine = line.Split(',');
var row = dt.NewRow();
for (int i = 0; i < splittedLines.Length; i++)
{
row[i] = splittedLines[i].Trim();
}
dt.Rows.Add(row);
}
return dt;
}
Call the methods above in your DecryptFile method, like below:
public void DecryptFile(string filePath, string CompanyName)
{
using (var fileStream = new FileStream(filePath, FileMode.Open))
{
using (var gzStream = new GZipStream(fileStream, CompressionMode.Decompress))
{
using (var outputStream = new MemoryStream())
{
gzStream.CopyTo(outputStream);
byte[] outputBytes = outputStream.ToArray();
// DeserailizeByteArrayToDatatable(outputBytes, CompanyName);
string FileContents = Encoding.ASCII.GetString(outputBytes);
DataTable dt = new DataTable();
string[] arrayList = FileContents.Split("\n");
// Usage
if (arrayList.Length > 0)
{
var columns = arrayList[0].Split(',');
var data = arrayList.AsEnumerable().Skip(1).ToArray(); // skip the first line.
dt = PopulateData(AddColumns(dt, columns), data);
}
}
}
}
}
Note: I see, your file has 4 columns in header row and it is supplying 3 values which is not correct, you must supply equal columns and its values otherwise, you might want to handle the row creation differently.
The general concept I would follow would be to:
Read the first row and determine which and how many columns I need to create
For each column use f.Columns.Add() to add the column to the DataTable.
For each row following do f.Rows.Add() filling in the data.
Something like:
public void DecryptFile(string filePath, string CompanyName)
{
using (var fileStream = new FileStream(filePath, FileMode.Open))
{
using (var gzStream = new GZipStream(fileStream, CompressionMode.Decompress))
{
using (var outputStream = new MemoryStream())
{
gzStream.CopyTo(outputStream);
byte[] outputBytes = outputStream.ToArray();
//DeserailizeByteArrayToDatatable(outputBytes, CompanyName);
string FileContents = Encoding.ASCII.GetString(outputBytes);
DataTable newTable = new DataTable();
string[] arrayList = FileContents.Split('\n');
int rowIndex = 0;
foreach (string line in arrayList)
{
// Assuming comma separated columns
string[] columns = line.Split(',');
int columnIndex = 0;
DataRow row = (rowIndex == 0 ? null : newTable.NewRow());
foreach (string columnValue in columns)
{
if (rowIndex == 0)
{
newTable.Columns.Add(new DataColumn(columnValue));
}
else
{
row[columnIndex] = columnValue;
}
}
rowIndex ++;
}
}
}
}
(have not tested the code)
You can create data table by using below function. I am not sure what format you want for start time and end time, so I have mentioned it in comments where you can do that.
public static DataTable ArrayOfStringToDataTable(string[] stringArray)
{
DataTable dataTable = new DataTable();
if (stringArray.Length == 0)
{
return dataTable;
}
var headers = stringArray[0].Split(',');
foreach (var header in headers)
{
dataTable.Columns.Add(header, typeof(string));
}
if (stringArray.Length == 1)
{
return dataTable;
}
for (var i = 1; i < stringArray.Length; i++) {
var rows = stringArray[i].Split(',');
var dataRow = dataTable.NewRow();
dataRow[0] = rows[0];
dataRow[1] = rows[1]; // do required formatting
dataRow[2] = rows[2]; // do required formatting
dataTable.Rows.Add(dataRow);
}
return dataTable;
}
I'm reading a CSV file with MemoryStream to insert all it's data in the datatable.
Why inserted data in the database is disordered compare to my DataTable in code?
this is sample code:
using (Stream stream = new MemoryStream(bytes))
{
using (TextFieldParser reader2 = new TextFieldParser(stream, Encoding.GetEncoding(1256)))
{
string lineString = "";
var firstLine = reader2.ReadLine();
lineString = reader2.ReadToEnd();
var largeText = lineString.Split(this.RecordDelimiter);
foreach (var item in largeText)
{
if (!string.IsNullOrEmpty(item))
{
DataRow row = dt.NewRow();
row[0] = _databaseRepository.GetSequencerNextVal("cms_batchfile_records_seq");
row[1] = Convert.ToInt32(requestId);
row[2] = DateAndTime.GetServerDate();
row[3] = DateAndTime.GetServerTime();
row[4] = null;
row[5] = null;
row[6] = Convert.ToInt32(formatId);
row[7] = (int)BatchFileRecords_ProcessStatusEnum.NotProcess;
row[8] = null;
row[9] = item;
dt.Rows.Add(row);
_log.Info("Service"+item);
}
}
}
}
_batchFileRecordsRepository.InsertDatas(dt);
public void InsertDatas(DataTable dt)
{
try
{
BatchFileRecords item;
for (int i = 0; i < dt.Rows.Count; i++)
{
item = new BatchFileRecords()
{
Record_id = Convert.ToInt32(dt.Rows[i].ItemArray[0]),
Request_id = Convert.ToInt32(dt.Rows[i].ItemArray[1]),
Record_date = dt.Rows[i].ItemArray[2].ToString(),
Record_time = dt.Rows[i].ItemArray[3].ToString(),
Process_date = dt.Rows[i].ItemArray[4].ToString(),
Process_time = dt.Rows[i].ItemArray[5].ToString(),
Dataformat_id = Convert.ToInt32(dt.Rows[i].ItemArray[6]),
Processstatus = Convert.ToInt32(dt.Rows[i].ItemArray[7]),
Processresult = dt.Rows[i].ItemArray[8].ToString(),
Data = dt.Rows[i].ItemArray[9].ToString()
};
_unitOfWork.Session.Save(item);
}
_unitOfWork.Commit();
}
catch (Exception ex)
{
throw;
}
}
for example my file is :
2506737718,WFP,t,t,13830811,241641941709,9190517705,,F,,,14160
2506156741,WFP,t,t,13840915,409742096044,9195690240,,F,,,14160
2506158590,WFP,t,t,13840318,038664583449,9182550182,,F,,,14160
in db is:
2506737718,WFP,t,t,13830811,241641941709,9190517705,,F,,,14160
2506158590,WFP,t,t,13840318,038664583449,9182550182,,F,,,14160
2506156741,WFP,t,t,13840915,409742096044,9195690240,,F,,,14160
primary key in db is Record_id that are 2506737718, 2506158590,2506156741.
how do i compare data in two excel sheet in the same excel worksheet, and display the difference in a new excel sheet using C#, i tried this:
using System.Text;
using System.Data;
using System.Data.OleDb;
using System.IO;
public class ExcelHandler
{
#region Properties
public string Directory { get; set; }
public string FirstFile { get; set; }
public string FirstFileSheetName { get; set; }
public string SecondFile { get; set; }
public string SecondFileSheetName { get; set; }
public DataTable ReturnDataSet { get; set; }
public bool Excel2007 { get; set; }
public bool UseHeader { get; set; }
#endregion
#region Constructor
public ExcelHandler() { }
public ExcelHandler(string Dir, string File1, string File1SheetName, string File2, string File2SheetName)
{
this.Directory = Dir;
this.FirstFile = File1;
this.SecondFile = File2;
this.FirstFileSheetName = File1SheetName;
this.SecondFileSheetName = File2SheetName;
}
#endregion
#region Match Files
public DataTable CheckExcelFiles()
{
DataTable dtRet = new DataTable();
//Read the first excel
try
{
//Read the excel
DataTable dt1 = GetDataTableFromExcel(this.Directory, this.FirstFile, this.FirstFileSheetName);
DataTable dt2 = GetDataTableFromExcel(this.Directory, this.SecondFile, this.SecondFileSheetName);
//Compare two
dtRet = getDifferentRecords(dt1, dt2);
}
catch (Exception ex) { }
return dtRet;
}
//Overload method to write to csv
public void CheckExcelFiles(string strFilePath)
{
DataTable dtRet = new DataTable();
//Read the first excel
try
{
//Read the excel
DataTable dt1 = GetDataTableFromExcel(this.Directory, this.FirstFile, this.FirstFileSheetName);
DataTable dt2 = GetDataTableFromExcel(this.Directory, this.SecondFile, this.SecondFileSheetName);
//Compare two
dtRet = getDifferentRecords(dt1, dt2);
ExportDataTableToExcel(dtRet, strFilePath);
}
catch (Exception ex) { }
}
//Get Datatable reading Excel
private DataTable GetDataTableFromExcel(string strDir, string strFileName, string strSheetName)
{
var fileName = string.Format("{0}\\" + strFileName, strDir);
string connectionString;
//if (Excel2007)
//read a 2007 file
connectionString = string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0};Extended Properties=\"Excel 8.0;HDR=" + (UseHeader == true ? "YES" : "NO") + ";\"", fileName);
//else
//read a 97-2003 file
//connectionString = string.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data Source={0};Extended Properties=\"Excel 8.0;HDR=" + (UseHeader == true ? "YES" : "NO") + ";\"", fileName);
//var connectionString = string.Format("Provider=Microsoft.Jet.OLEDB.4.0; data source={0}; Extended Properties=Excel 8.0;", fileName);
var adapter = new OleDbDataAdapter("SELECT * FROM [" + strSheetName + "$]", connectionString);
var ds = new DataSet();
adapter.Fill(ds, fileName + strSheetName);
return ds.Tables[fileName + strSheetName];
}
//Compare datatables
private DataTable CompareDataTable(DataTable A, DataTable B)
{
A.PrimaryKey = new DataColumn[] { A.Columns["PK"] };
B.PrimaryKey = new DataColumn[] { B.Columns["PK"] };
A.Merge(B, true); // this will add to A any records that are in B but not A
A.AcceptChanges();
return A.GetChanges(DataRowState.Added); // returns records originally only in B
}
//Provided here http://social.msdn.microsoft.com/Forums/en-US/csharpgeneral/thread/23703a85-20c7-4759-806a-fabf4e9f5be6/
//Provided by Guo Surfer
#region Compare two DataTables and return a DataTable with DifferentRecords
/// <summary>
/// Compare two DataTables and return a DataTable with DifferentRecords
/// </summary>
/// <param name="FirstDataTable">FirstDataTable</param>
/// <param name="SecondDataTable">SecondDataTable</param>
/// <returns>DifferentRecords</returns>
public DataTable getDifferentRecords(DataTable FirstDataTable, DataTable SecondDataTable)
{
//Create Empty Table
DataTable ResultDataTable = new DataTable("ResultDataTable");
//use a Dataset to make use of a DataRelation object
using (DataSet ds = new DataSet())
{
//Add tables
ds.Tables.AddRange(new DataTable[] { FirstDataTable.Copy(), SecondDataTable.Copy() });
//Get Columns for DataRelation
DataColumn[] firstColumns = new DataColumn[ds.Tables[0].Columns.Count];
for (int i = 0; i < firstColumns.Length; i++)
{
firstColumns[i] = ds.Tables[0].Columns[i];
}
DataColumn[] secondColumns = new DataColumn[ds.Tables[1].Columns.Count];
for (int i = 0; i < secondColumns.Length; i++)
{
secondColumns[i] = ds.Tables[1].Columns[i];
}
//Create DataRelation
DataRelation r1 = new DataRelation(string.Empty, firstColumns, secondColumns, false);
ds.Relations.Add(r1);
DataRelation r2 = new DataRelation(string.Empty, secondColumns, firstColumns, false);
ds.Relations.Add(r2);
//Create columns for return table
for (int i = 0; i < FirstDataTable.Columns.Count; i++)
{
ResultDataTable.Columns.Add(FirstDataTable.Columns[i].ColumnName, FirstDataTable.Columns[i].DataType);
}
//If FirstDataTable Row not in SecondDataTable, Add to ResultDataTable.
ResultDataTable.BeginLoadData();
foreach (DataRow parentrow in ds.Tables[0].Rows)
{
DataRow[] childrows = parentrow.GetChildRows(r1);
if (childrows == null || childrows.Length == 0)
ResultDataTable.LoadDataRow(parentrow.ItemArray, true);
}
//If SecondDataTable Row not in FirstDataTable, Add to ResultDataTable.
foreach (DataRow parentrow in ds.Tables[1].Rows)
{
DataRow[] childrows = parentrow.GetChildRows(r2);
if (childrows == null || childrows.Length == 0)
ResultDataTable.LoadDataRow(parentrow.ItemArray, true);
}
ResultDataTable.EndLoadData();
}
return ResultDataTable;
}
#endregion
private void ExportDataTableToExcel(DataTable dt, string strFilePath)
{
// Create the CSV file to which grid data will be exported.
StreamWriter sw = new StreamWriter(strFilePath, false);
// First we will write the headers.
//DataTable dt = m_dsProducts.Tables[0];
int iColCount = dt.Columns.Count;
for (int i = 0; i < iColCount; i++)
{
sw.Write(dt.Columns[i]);
if (i < iColCount - 1)
{
sw.Write(",");
}
}
sw.Write(sw.NewLine);
// Now write all the rows.
foreach (DataRow dr in dt.Rows)
{
for (int i = 0; i < iColCount; i++)
{
if (!Convert.IsDBNull(dr[i]))
{
sw.Write(dr[i].ToString());
}
if (i < iColCount - 1)
{
sw.Write(",");
}
}
sw.Write(sw.NewLine);
}
sw.Close();
}
#endregion
}
private void button1_Click(object sender, EventArgs e)
{
//Declare the class and set the excel file dir/path and sheet name
ExcelHandler eh = new ExcelHandler() { Directory = "c:\\temp", FirstFile = "Book1.xlsx", FirstFileSheetName = "Sheet1", SecondFile = "Book2.xlsx", SecondFileSheetName = "Sheet1" };
//Get the datatable
DataTable dt = eh.CheckExcelFiles();
//Or write to CSV top open with excel
eh.CheckExcelFiles("c:\\temp\\test123.csv");
}
this works for me only that it is merging the content of the two worksheet into a new excel sheet test123.csv, i want to show only the difference, i will appreciate, if anyone could assist. Thanks in advance
I need to fill two datatables with two excel files. The files can be a Comma-separated values file (.csv) or it can be an Excel document (.xlsx) file. I use a function to convert my .csv to a datatable :
public DataTable GetDataTableFromCsv(string path)
{
DataTable dataTable = new DataTable();
String[] values;
values = File.ReadAllLines(path);
string[] csvRows = System.IO.File.ReadAllLines(path);
string[] headers = csvRows[0].Split(',');
// Adding columns name
foreach (var item in headers)
dataTable.Columns.Add(new DataColumn(item));
string[] fields = null;
foreach (string csvRow in csvRows)
{
fields = csvRow.Split(',');
DataRow row = dataTable.NewRow();
row.ItemArray = fields;
dataTable.Rows.Add(row);
}
return dataTable;
}
The problem occurs when I try to compare my two datatables with :
ieDiff = dt1.AsEnumerable().Except(dt2.AsEnumerable(), DataRowComparer.Default);
My first datatable obtained by a .xlsx file contains the correct values and my second datatable obtained by a .csv file contains the correct values, but with some extra quotes.
ex :
dt1.Rows[10] (datatable from .csv) will contains :
Item1
Item2
"."
"Item4"
dt2.Rows[10] (datatable from .xlsx) will contains :
Item1
Item2
.
Item4
I know I can trim all of these, but there is certainly a better way to perform this?
The easiest answer is one you mentioned in the question which is to trim the quote character.
My test code:
var test = new[] { "Item1", "Item2", "\".\"", "\"Item4\"" };
foreach (var s in test)
{
textBox1.AppendTextAddNewLine(s.Trim('"'));
}
Resulting Output:
Item1
Item2
.
Item4
I have written five methods below that will turn a Csv file into a DataTable.
They have been designed to take into account optional quote marks (e.g. " symbols) and to be as versatile as possible without using other libraries:
public static DataTable GetDataTabletFromCSVFile(string filePath, bool isHeadings)
{
DataTable MethodResult = null;
try
{
using (TextFieldParser TextFieldParser = new TextFieldParser(filePath))
{
if (isHeadings)
{
MethodResult = GetDataTableFromTextFieldParser(TextFieldParser);
}
else
{
MethodResult = GetDataTableFromTextFieldParserNoHeadings(TextFieldParser);
}
}
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
public static DataTable GetDataTableFromCsvString(string csvBody, bool isHeadings)
{
DataTable MethodResult = null;
try
{
MemoryStream MemoryStream = new MemoryStream();
StreamWriter StreamWriter = new StreamWriter(MemoryStream);
StreamWriter.Write(csvBody);
StreamWriter.Flush();
MemoryStream.Position = 0;
using (TextFieldParser TextFieldParser = new TextFieldParser(MemoryStream))
{
if (isHeadings)
{
MethodResult = GetDataTableFromTextFieldParser(TextFieldParser);
}
else
{
MethodResult = GetDataTableFromTextFieldParserNoHeadings(TextFieldParser);
}
}
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
public static DataTable GetDataTableFromRemoteCsv(string url, bool isHeadings)
{
DataTable MethodResult = null;
try
{
HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
StreamReader StreamReader = new StreamReader(httpWebResponse.GetResponseStream());
using (TextFieldParser TextFieldParser = new TextFieldParser(StreamReader))
{
if (isHeadings)
{
MethodResult = GetDataTableFromTextFieldParser(TextFieldParser);
}
else
{
MethodResult = GetDataTableFromTextFieldParserNoHeadings(TextFieldParser);
}
}
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
private static DataTable GetDataTableFromTextFieldParser(TextFieldParser textFieldParser)
{
DataTable MethodResult = null;
try
{
textFieldParser.SetDelimiters(new string[] { "," });
textFieldParser.HasFieldsEnclosedInQuotes = true;
string[] ColumnFields = textFieldParser.ReadFields();
DataTable dt = new DataTable();
foreach (string ColumnField in ColumnFields)
{
DataColumn DataColumn = new DataColumn(ColumnField);
DataColumn.AllowDBNull = true;
dt.Columns.Add(DataColumn);
}
while (!textFieldParser.EndOfData)
{
string[] Fields = textFieldParser.ReadFields();
for (int i = 0; i < Fields.Length; i++)
{
if (Fields[i] == "")
{
Fields[i] = null;
}
}
dt.Rows.Add(Fields);
}
MethodResult = dt;
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
private static DataTable GetDataTableFromTextFieldParserNoHeadings(TextFieldParser textFieldParser)
{
DataTable MethodResult = null;
try
{
textFieldParser.SetDelimiters(new string[] { "," });
textFieldParser.HasFieldsEnclosedInQuotes = true;
bool FirstPass = true;
DataTable dt = new DataTable();
while (!textFieldParser.EndOfData)
{
string[] Fields = textFieldParser.ReadFields();
if(FirstPass)
{
for (int i = 0; i < Fields.Length; i++)
{
DataColumn DataColumn = new DataColumn("Column " + i);
DataColumn.AllowDBNull = true;
dt.Columns.Add(DataColumn);
}
FirstPass = false;
}
for (int i = 0; i < Fields.Length; i++)
{
if (Fields[i] == "")
{
Fields[i] = null;
}
}
dt.Rows.Add(Fields);
}
MethodResult = dt;
}
catch (Exception ex)
{
ex.HandleException();
}
return MethodResult;
}
If, like me, you're saving from reporting services then you should use it like this:
Warning[] warnings;
string[] streamids;
string mimeType;
string encoding;
string filenameExtension;
byte[] bytes = rvMain.ServerReport.Render("csv", null, out mimeType, out encoding, out filenameExtension, out streamids, out warnings);
string CsvBody = System.Text.Encoding.UTF8.GetString(bytes);
DataTable dt = GetDataTableFromCsvString(CsvBody,true);
Otherwise, all you need do is:
bool IsHeadings = true; //Does the data include a heading row?
DataTable dt = GetDataTableFromCsvString(CsvBody, IsHeadings);
Or to use directly from a csv file
bool IsHeadings = true; //Does the data include a heading row?
DataTable dt = GetDataTabletFromCsvFile(FilePath, IsHeadings)
Or to use a csv file that is stored remotely
bool IsHeadings = true; //Does the data include a heading row?
DataTable dt = GetDataTabletFromRemoteCsv(Url, IsHeadings)