C# ExcelDataReader and OpenXML both skipping empty columns - c#

I'm trying to read data from an Excel file into a datatable. Some of the cells in the Excel may be empty.
Both the libraries are skipping empty cells and hence the order in datatable is wrong. For example, if the Excel is like this:
Name Contact Gender
A M
B C1 F
C M
The result in datatable is:
Name => A
Contact => M
whereas it should be:
Name => A
Contact =>
Gender => M
Here is the code:
ExcelDataReader
using (var stream = new FileStream(filePath, FileMode.Open))
{
if (extension == ".xls")
reader = ExcelReaderFactory.CreateBinaryReader(stream);
else
reader = ExcelReaderFactory.CreateOpenXmlReader(stream);
DataSet ds = new DataSet();
ds = reader.AsDataSet();
reader.Close();
if (ds != null && ds.Tables.Count > 0)
{
return ds.Tables[0];
}
}
OpenXML
public static string GetCellValue(SpreadsheetDocument document, Cell cell)
{
SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
string value = cell.CellValue.InnerXml;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return stringTablePart.SharedStringTable.ChildElements[int.Parse(value)].InnerText;
}
else
{
return value;
}
}
There are few fixes I found but none of them worked for me.
A sample Excel is:

Related

Converting Excel to CSV with C# getting extra , on each row

Am converting an Excel file to a CSV in a Azure WebJob to keep the file in blob storage during the process using C# but am getting an extra comma after each row in my csv file.
Example:
1,Test,Doe,
2,Test,John,
Here is my code for producing the csv:
public static class ExcelToCSVConvertor
{
public static List<BlobInput> Convert(List<BlobOutput> inputs)
{
var dataForBlobInput = new List<BlobInput>();
try
{
foreach (BlobOutput item in inputs)
{
using (SpreadsheetDocument document = SpreadsheetDocument.Open(item.BlobContent, false))
{
foreach (Sheet _Sheet in document.WorkbookPart.Workbook.Descendants<Sheet>())
{
WorksheetPart _WorksheetPart = (WorksheetPart)document.WorkbookPart.GetPartById(_Sheet.Id);
Worksheet _Worksheet = _WorksheetPart.Worksheet;
SharedStringTablePart _SharedStringTablePart = document.WorkbookPart.GetPartsOfType<SharedStringTablePart>().First();
SharedStringItem[] _SharedStringItem = _SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ToArray();
StringBuilder stringBuilder = new StringBuilder();
foreach (var row in _Worksheet.Descendants<Row>())
{
foreach (Cell _Cell in row)
{
string Value = string.Empty;
if (_Cell.CellValue != null)
{
if (_Cell.DataType != null && _Cell.DataType.Value == CellValues.SharedString)
Value = _SharedStringItem[int.Parse(_Cell.CellValue.Text)].InnerText;
else
Value = _Cell.CellValue.Text;
}
stringBuilder.Append(string.Format("{0},", Value.Trim()));
}
stringBuilder.Append("\n");
}
byte[] data = Encoding.UTF8.GetBytes(stringBuilder.ToString().Trim());
string fileNameWithoutExtn = item.BlobName.ToString().Substring(0, item.BlobName.ToString().IndexOf("."));
string newFilename = $"{fileNameWithoutExtn}_{_Sheet.Name}.csv";
dataForBlobInput.Add(new BlobInput { BlobName = newFilename, BlobContent = data });
}
}
}
}
catch (Exception Ex)
{
throw Ex;
}
return dataForBlobInput;
}
}
This line is adding comma after EVERY single value
stringBuilder.Append(string.Format("{0},", Value.Trim()));
1,
Test,
Doe,
2,
Test,
John,
You need to exclude the comma on last value in row (last iteration).
On last iteration foreach (Cell _Cell in row) stringBuilder.Append should be
stringBuilder.Append(string.Format("{0}", Value.Trim()));

Want to fill data from array of string to datatable in c#

public void DecryptFile(string filePath, string CompanyName)
{
using (var fileStream = new FileStream(filePath, FileMode.Open))
{
using (var gzStream = new GZipStream(fileStream, CompressionMode.Decompress))
{
using (var outputStream = new MemoryStream())
{
gzStream.CopyTo(outputStream);
byte[] outputBytes = outputStream.ToArray();
// DeserailizeByteArrayToDatatable(outputBytes, CompanyName);
string FileContents = Encoding.ASCII.GetString(outputBytes);
DataTable dt = new DataTable();
string[] arrayList = FileContents.Split("\n");
}
}
}
}
I have decrypted contents of file and now I want to fill the data to DataTable. The format of file contents is:
serial_number,peak_period_start_time,stop_time
15AA01AF361903PC,1602619200,1602615600
15AA01AF361902QB,1602619200,1602615600
15AA01AF361906YL,1602619200,1602630000
09AA01AF32190YXP,1602619200,1602630000
so I want to fil the first line as column in DataTable and rest all as rows and aslo additionally I want to change the start_time and stop time to correct format
I would suggest you to first create a method that will accept a DataTable and an array of columns and will add columns to the DataTable.
public DataTable AddColumns(DataTable dt, string[] columns)
{
if (dt == null) dt = new DataTable();
foreach (string column in columns)
{
dt.Columns.Add(column.Trim());
}
return dt;
}
Then, Create a method that will populate data into that DataTable:
public DataTable PopulateData(DataTable dt, string[] dataLines)
{
if (dataLines == null || dataLines.Length == 0) return dt;
foreach (string line in dataLines)
{
var splittedLine = line.Split(',');
var row = dt.NewRow();
for (int i = 0; i < splittedLines.Length; i++)
{
row[i] = splittedLines[i].Trim();
}
dt.Rows.Add(row);
}
return dt;
}
Call the methods above in your DecryptFile method, like below:
public void DecryptFile(string filePath, string CompanyName)
{
using (var fileStream = new FileStream(filePath, FileMode.Open))
{
using (var gzStream = new GZipStream(fileStream, CompressionMode.Decompress))
{
using (var outputStream = new MemoryStream())
{
gzStream.CopyTo(outputStream);
byte[] outputBytes = outputStream.ToArray();
// DeserailizeByteArrayToDatatable(outputBytes, CompanyName);
string FileContents = Encoding.ASCII.GetString(outputBytes);
DataTable dt = new DataTable();
string[] arrayList = FileContents.Split("\n");
// Usage
if (arrayList.Length > 0)
{
var columns = arrayList[0].Split(',');
var data = arrayList.AsEnumerable().Skip(1).ToArray(); // skip the first line.
dt = PopulateData(AddColumns(dt, columns), data);
}
}
}
}
}
Note: I see, your file has 4 columns in header row and it is supplying 3 values which is not correct, you must supply equal columns and its values otherwise, you might want to handle the row creation differently.
The general concept I would follow would be to:
Read the first row and determine which and how many columns I need to create
For each column use f.Columns.Add() to add the column to the DataTable.
For each row following do f.Rows.Add() filling in the data.
Something like:
public void DecryptFile(string filePath, string CompanyName)
{
using (var fileStream = new FileStream(filePath, FileMode.Open))
{
using (var gzStream = new GZipStream(fileStream, CompressionMode.Decompress))
{
using (var outputStream = new MemoryStream())
{
gzStream.CopyTo(outputStream);
byte[] outputBytes = outputStream.ToArray();
//DeserailizeByteArrayToDatatable(outputBytes, CompanyName);
string FileContents = Encoding.ASCII.GetString(outputBytes);
DataTable newTable = new DataTable();
string[] arrayList = FileContents.Split('\n');
int rowIndex = 0;
foreach (string line in arrayList)
{
// Assuming comma separated columns
string[] columns = line.Split(',');
int columnIndex = 0;
DataRow row = (rowIndex == 0 ? null : newTable.NewRow());
foreach (string columnValue in columns)
{
if (rowIndex == 0)
{
newTable.Columns.Add(new DataColumn(columnValue));
}
else
{
row[columnIndex] = columnValue;
}
}
rowIndex ++;
}
}
}
}
(have not tested the code)
You can create data table by using below function. I am not sure what format you want for start time and end time, so I have mentioned it in comments where you can do that.
public static DataTable ArrayOfStringToDataTable(string[] stringArray)
{
DataTable dataTable = new DataTable();
if (stringArray.Length == 0)
{
return dataTable;
}
var headers = stringArray[0].Split(',');
foreach (var header in headers)
{
dataTable.Columns.Add(header, typeof(string));
}
if (stringArray.Length == 1)
{
return dataTable;
}
for (var i = 1; i < stringArray.Length; i++) {
var rows = stringArray[i].Split(',');
var dataRow = dataTable.NewRow();
dataRow[0] = rows[0];
dataRow[1] = rows[1]; // do required formatting
dataRow[2] = rows[2]; // do required formatting
dataTable.Rows.Add(dataRow);
}
return dataTable;
}

C# ExcelDataReader Error - 'Invalid file signature' for XLSB format

I am receiving 'Invalid file signature' error when I try to read xlsb file using below code.
If I use CreateReader, then I am receiving 'Detected ZIP file, but not a valid OpenXml file' error. I have also tried other options as given below but nothing works for me.
Can somebody help me to read xlsb file.
Stream stream = new MemoryStream(srcContent);
public static DataSet GetXLSBData(Stream stream)
{
DataSet dataSet;
using (var reader = ExcelReaderFactory.CreateBinaryReader(stream))
{
dataSet = reader.AsDataSet();
}
foreach (DataTable table in dataSet.Tables)
{
table.TableName = table.TableName.Trim();
}
return dataSet;
}
Other options tried:
var reader = ExcelReaderFactory.CreateOpenXmlReader(stream)
var reader = ExcelReaderFactory.CreateCsvReader(stream)
var reader = ExcelReaderFactory.CreateReader(stream)
My proposal
c# code :
using (XlsxOrXlsbReadOrEdit excelFile = new XlsxOrXlsbReadOrEdit())
{
excelFile.Open("file.xlsx");
excelFile.ActualSheetName = "sheet1";
object[] row = null;
while (excelFile.Read())
{
if (row == null)
{
row = new object[excelFile.FieldCount];
}
excelFile.GetValues(row);
}
}
disclimer - I am creator of SpreadSheetTasks
Links
https://www.nuget.org/packages/SpreadSheetTasks/

Update an excel sheet uploaded in ASP.NET MVC

I have a function where an excel sheet is uploaded and cells are read using column names which match the item in a list of string in the backend.
I need to alter the column names ( remove spaces and convert to lower()) so that it matches the items when I use the list of string as column names to read the excel sheet.
I am using IExcelDataReader Library from nuget package .
I read all the column names using this package and looped through them trying to alter them but I couldn't do so as it was a read only mode.
What can I do to alter the column names of an uploaded excel sheet.
public ActionResult Import(HttpPostedFileBase upload)
{
if (upload != null && upload.ContentLength > 0)
{
Stream stream = upload.InputStream;
IExcelDataReader reader = null;
if (upload.FileName.EndsWith(".xls") || upload.FileName.EndsWith(".xlsx"))
{
var file = upload.FileName;
reader = ExcelDataReader.ExcelReaderFactory.CreateReader(stream);
var conf = new ExcelDataSetConfiguration
{
ConfigureDataTable = _ => new ExcelDataTableConfiguration
{
UseHeaderRow = true
}
};
var dataSet = reader.AsDataSet(conf);
var Model = dataSet.Tables[0].Rows;
var meta = listOfStrings; // Column names to be read in a list
for (int i = 0; i < Model.Count; i++)
{
var dict = new Dictionary<string, string>();
foreach (var item in meta)
{
dict.Add(item, Model[i][item.ToLower().Trim()].ToString());
// During this stage I need the column names to exactly match my list of string
// What can I do to Convert my strings to lower case and trim them?
}
}
}
}
}

Import *.xlsx data via ExcelDataReader with 0 number of records affected

I use ExcelDataReader and ExcelDataReader.DataSet libraries to read over 1700 rows from and xlsx-file. Eventually, there are no records read. However if we restrict number of rows in a dile down to 9 - all the rows data are read correctly. Here is code:
private IEnumerable<T> ParseImportSourceFile<T>(string fileName, byte[] file) where T : CsvClassMap
{
IEnumerable<T> records = null;
var extensionEnd = Path.GetExtension(fileName).ToLower();
switch (Path.GetExtension(fileName).ToLower())
{
case ".csv":
break;
case ".xls":
case ".xlsx":
case ".ods":
records = new List<T>();
var memoryStream = new MemoryStream(file);
var streamReader = new StreamReader(memoryStream);
var reader = ExcelReaderFactory.CreateReader(streamReader.BaseStream);
if (extensionEnd == ".xlsx")
{
reader = ExcelReaderFactory.CreateOpenXmlReader(streamReader.BaseStream);
//reader.
}
//reader.AsDataSet().
try
{
var result = reader.AsDataSet(new ExcelDataSetConfiguration()
{
UseColumnDataType = true,
ConfigureDataTable = (tableReader) => new ExcelDataTableConfiguration()
{
EmptyColumnNamePrefix = "Column",
UseHeaderRow = true,
}
});
if (result.Tables.Count > 0)
{
var tbl = result.Tables[0];
//grab column names into array
var columns = new DataColumn[tbl.Columns.Count];
tbl.Columns.CopyTo(columns, 0);
//grab properties of T class
var tType = typeof(T);
var properties = tType.GetProperties(BindingFlags.Public | BindingFlags.Instance);
for (int i = 0; i < tbl.Rows.Count; i++)
{
//one row one record... create an instance for each row
var row = tbl.Rows[i];
var record = Activator.CreateInstance<T>();
//read all columns and try set valid property
foreach (var col in columns)
{
var prop = properties.FirstOrDefault(x => x.Name == col.ColumnName ||
(x.GetCustomAttribute<DisplayAttribute>() != null && x.GetCustomAttribute<DisplayAttribute>().Name == col.ColumnName));
if (prop != null && prop.CanWrite)
{
//
var sourceValue = row[col.ColumnName];
if(!(sourceValue is DBNull))
prop.SetValue(record, Parse(prop.PropertyType, sourceValue.ToString()));
}
}
((IList<T>)records).Add(record);
}
}
}
finally
{
if (reader != null) reader.Dispose();
if (streamReader != null) streamReader.Dispose();
if (memoryStream != null) memoryStream.Dispose();
}
break;
default:
throw new ImportException("Template is not valid.");
}
return records;
}
I use CreateOpenXmlReader method for xlsx file as it was suggessted on
stackoverflow.
tbl.Rows.Count is zero - from more than 1700 rows in xlsx file. Please help

Categories

Resources