I use the below code to read data from an excel file into a DataTable object for further use. Since it processes from 100k to 500k entries, the reading can get a bit slow. Is there something I could change in my code in order to optimize the process ? Code is below.
public static DataTable ReadAsDataTable(string filePath)
{
DataTable dataTable = new DataTable();
using (SpreadsheetDocument spreadSheetDocument = SpreadsheetDocument.Open(filePath, false))
{
WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart;
IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
string relationshipId = sheets.First().Id.Value;
WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId);
Worksheet workSheet = worksheetPart.Worksheet;
SheetData sheetData = workSheet.GetFirstChild<SheetData>();
IEnumerable<Row> rows = sheetData.Descendants<Row>();
foreach (Cell cell in rows.ElementAt(0))
{
dataTable.Columns.Add(GetCellValue(spreadSheetDocument, cell));
}
foreach (Row row in rows)
{
DataRow dataRow = dataTable.NewRow();
for (int i = 0; i < row.Descendants<Cell>().Count(); i++)
{
dataRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i));
}
dataTable.Rows.Add(dataRow);
}
}
dataTable.Rows.RemoveAt(0);
return dataTable;
}
private static string GetCellValue(SpreadsheetDocument document, Cell cell)
{
SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
string value = cell.CellValue.InnerXml;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
}
else
{
return value;
}
}
I'm not sure what's the compiler behavior on this or the performance characteristics of that API, but would it help if you call row.Descendants<Cell>() only once? It seems like something the compiler could optimize, but there may be side effects involved, so it doesn't do anything.
foreach (Row row in rows)
{
var cells = row.Descendants<Cell>().ToArray();
DataRow dataRow = dataTable.NewRow();
for (int i = 0; i < cells.Length; i++)
{
dataRow[i] = GetCellValue(spreadSheetDocument, cells[i]);
}
dataTable.Rows.Add(dataRow);
}
Related
What I want to achieve is illustrated below:
The result and issue I've been having are illustrated below:
This is the result file my code has generated and there supposed to have expected content.
Window prompt after the 'Yes' button has clicked.
My running code is given below:
Main method:
public static void Main(string[] args)
{
WriteExcelService writeExcelService = new WriteExcelService();
Dictionary<string, List<string>> contentList = new Dictionary<string, List<string>>
{
{ "en-US",new List<string> (new string[] { "Dummy text 01","Dummy text 02"}) },
{ "es-ES",new List<string> (new string[] { "Texto ficticio 01", "Texto ficticio 02"}) }
};
string inputFile = #"C:\{username}\Desktop\Valentines_Day.xlsx";
string sheetName = "Copy";
writeExcelService.WriteValueToCell(inputFile, sheetName, contentList);
}
WriteValueToCell method:
char columnName = 'I';
uint rowNumber = 1;
foreach (var keys in contentList.Keys)
{
foreach (var value in contentList.Where(v => v.Key == keys).SelectMany(v => v.Value))
{
string cellAddress = String.Concat(columnName, rowNumber);
this.Write(filepath, sheetName, value, cellAddress, rowNumber);
int tempColumn = (int)columnName;
columnName = (char)++tempColumn;
}
columnName = 'I';
++rowNumber;
}
Write method:
private void Write(string filepath, string sheetName, string value, string cellAddress,uint rowNumber)
{
// Create a spreadsheet document by supplying the filepath.
// By default, AutoSave = true, Editable = true, and Type = xlsx.
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Create(filepath, SpreadsheetDocumentType.Workbook))
{
//writeExcelService.WriteValueToCell(outputFilePath, sheetName, cellAddress, value.Value);
// Add a WorkbookPart to the document.
WorkbookPart workbookpart = spreadsheetDocument.AddWorkbookPart();
workbookpart.Workbook = new Workbook();
// Add a WorksheetPart to the WorkbookPart.
WorksheetPart worksheetPart = workbookpart.AddNewPart<WorksheetPart>();
worksheetPart.Worksheet = new Worksheet(new SheetData());
// Add Sheets to the Workbook.
Sheets sheets = spreadsheetDocument.WorkbookPart.Workbook.AppendChild<Sheets>(new Sheets());
// Append a new worksheet and associate it with the workbook.
Sheet sheet = new Sheet() { Id = spreadsheetDocument.WorkbookPart.GetIdOfPart(worksheetPart), SheetId = 1, Name = sheetName };
sheets.Append(sheet);
// Get the sheetData cell table.
SheetData sheetData = worksheetPart.Worksheet.GetFirstChild<SheetData>();
// Add a row to the cell table.
Row row;
row = new Row() { RowIndex = rowNumber };
sheetData.Append(row);
// In the new row, find the column location to insert a cell.
Cell refCell = null;
foreach (Cell cell in row.Elements<Cell>())
{
if (string.Compare(cell.CellReference.Value, cellAddress, true) > 0)
{
refCell = cell;
break;
}
}
// Add the cell to the cell table.
Cell newCell = new Cell() { CellReference = cellAddress };
row.InsertBefore(newCell, refCell);
// Set the cell value to be a numeric value.
newCell.CellValue = new CellValue(value);
newCell.DataType = new EnumValue<CellValues>(CellValues.Number);
}
}
My problem is:
My code executes but once the result file is opened, It prompts window as I posted above, and the file is empty.If I debug the code to insert list of contents one by one, it can be written correctly to Cells I2 or J2. Since my code creates SpreadsheetDocument for each list content, therefore I have changed the SpreadsheetDocument creation approach in the code below:
using (SpreadsheetDocument spreadsheetDocument = File.Exists(filePath) ?
SpreadsheetDocument.Open(filepath, true) :
SpreadsheetDocument.Create(filepath, SpreadsheetDocumentType.Workbook))
{
}
But I am getting exception
Only one instance of the type is allowed for this parent.
Anyone can help me on this?
Appreciate it in advance.
Strings in excel are saved under a sharedStringTable. When inserting a string it is important to add the string or reference the string from the sharedStringTable. Also, you need to provide the correct DataType for a cell. In your code, you are inserting all values as a number:
// Set the cell value to be a numeric value.
newCell.CellValue = new CellValue(value);
newCell.DataType = new EnumValue<CellValues>(CellValues.Number);
To insert a string I would recommend using the following method after you have created a new cell:
private SpreadsheetDocument _spreadSheet;
private WorksheetPart _worksheetPart;
..
..
private void UpdateCell(Cell cell, DataTypes type, string text)
{
if (type == DataTypes.String)
{
cell.DataType = CellValues.SharedString;
if (!_spreadSheet.WorkbookPart.GetPartsOfType<SharedStringTablePart>().Any())
{
_spreadSheet.WorkbookPart.AddNewPart<SharedStringTablePart>();
}
var sharedStringTablePart = _spreadSheet.WorkbookPart.GetPartsOfType<SharedStringTablePart>().First();
if (sharedStringTablePart.SharedStringTable == null)
{
sharedStringTablePart.SharedStringTable = new SharedStringTable();
}
//Iterate through shared string table to check if the value is already present.
foreach (SharedStringItem ssItem in sharedStringTablePart.SharedStringTable.Elements<SharedStringItem>())
{
if (ssItem.InnerText == text)
{
cell.CellValue = new CellValue(ssItem.ElementsBefore().Count().ToString());
return;
}
}
// The text does not exist in the part. Create the SharedStringItem.
var item = sharedStringTablePart.SharedStringTable.AppendChild(new SharedStringItem(new Text(text)));
cell.CellValue = new CellValue(item.ElementsBefore().Count().ToString());
}
else if (type == DataTypes.Number)
{
cell.CellValue = new CellValue(text);
cell.DataType = CellValues.Number;
}
else if (type == DataTypes.DateTime)
{
cell.DataType = CellValues.Number;
cell.StyleIndex = Convert.ToUInt32(_dateStyleIndex);
DateTime dateTime = DateTime.Parse(text);
double oaValue = dateTime.ToOADate();
cell.CellValue = new CellValue(oaValue.ToString(CultureInfo.InvariantCulture));
}
_worksheetPart.Worksheet.Save();
}
I have figured out the solution myself. I have passed the list of string contents and write them all to corresponding Cells then closed the SpreadSheetDocument. In this way SpreadSheetDocument can be created once. Working code is below:
public void WriteValueToCell(string filepath, string sheetName, Dictionary<string, List<string>> contentList)
{
// Create a spreadsheet document by supplying the filepath.
// By default, AutoSave = true, Editable = true, and Type = xlsx.
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Create(filepath, SpreadsheetDocumentType.Workbook, true))
{
// Add a WorkbookPart to the document.
WorkbookPart workbookpart = spreadsheetDocument.AddWorkbookPart();
workbookpart.Workbook = new Workbook();
//Add a WorkbookStylesPart to the workbookpart
WorkbookStylesPart stylesPart = workbookpart.AddNewPart<WorkbookStylesPart>();
stylesPart.Stylesheet = new Stylesheet();
// Add a WorksheetPart to the WorkbookPart.
WorksheetPart worksheetPart = workbookpart.AddNewPart<WorksheetPart>();
worksheetPart.Worksheet = new Worksheet(new SheetData());
// Add Sheets to the Workbook.
Sheets sheets = spreadsheetDocument.WorkbookPart.Workbook.AppendChild<Sheets>(new Sheets());
// Append a new worksheet and associate it with the workbook.
Sheet sheet = new Sheet() { Id = spreadsheetDocument.WorkbookPart.GetIdOfPart(worksheetPart), SheetId = 1, Name = sheetName };
sheets.Append(sheet);
// Get the sheetData cell table.
SheetData sheetData = worksheetPart.Worksheet.GetFirstChild<SheetData>();
char columnName = 'I';
uint rowNumber = 1;
foreach (var keys in contentList.Keys)
{
foreach (var value in contentList.Where(v => v.Key == keys).SelectMany(v => v.Value))
{
string cellAddress = String.Concat(columnName, rowNumber);
// Add a row to the cell table.
Row row;
row = new Row() { RowIndex = rowNumber };
sheetData.Append(row);
// In the new row, find the column location to insert a cell.
Cell refCell = null;
foreach (Cell cell in row.Elements<Cell>())
{
if (string.Compare(cell.CellReference.Value, cellAddress, true) > 0)
{
refCell = cell;
break;
}
}
// Add the cell to the cell table.
Cell newCell = new Cell() { CellReference = cellAddress };
row.InsertBefore(newCell, refCell);
// Set the cell value to be a numeric value.
newCell.CellValue = new CellValue(value);
newCell.DataType = new EnumValue<CellValues>(CellValues.String);
int tempColumn = (int)columnName;
columnName = (char)++tempColumn;
}
columnName = 'I';
++rowNumber;
}
}
}
Main method:
public static void Main(string[] args)
{
WriteExcelService writeExcelService = new WriteExcelService();
Dictionary<string, List<string>> contentList = new Dictionary<string, List<string>>
{
{ "en-US",new List<string> (new string[] { "Dummy text 01","Dummy text 02"}) },
{ "es-ES",new List<string> (new string[] { "Texto ficticio 01", "Texto ficticio 02"}) }
};
string inputFile = #"C:\{username}\Desktop\Valentines_Day.xlsx";
string sheetName = "Copy";
writeExcelService.WriteValueToCell(inputFile, sheetName, contentList);
}
I have an issue at converting from Excel file to XML file. When I convert the info in table tags, it converts in <table1></table1> <table1></table1> <table1></table1>but I need to convert each column in <table1></table1><table2></table2>...
The code is the following.
private DataTable ReadExcelFile(string filename)
{
// Initialize an instance of DataTable
DataTable dt = new DataTable();
try
{
// Use SpreadSheetDocument class of Open XML SDK to open excel file
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(filename, false))
{
// Get Workbook Part of Spread Sheet Document
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
// Get all sheets in spread sheet document
// IEnumerable Interface
// Exposes an enumerator, which supports a simple iteration over a non-generic collection.
IEnumerable<Sheet> sheetcollection = spreadsheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
// Get relationship Id
string relationshipId = sheetcollection.First().Id.Value;
// Get sheet1 Part of Spread Sheet Document
WorksheetPart worksheetPart = (WorksheetPart)spreadsheetDocument.WorkbookPart.GetPartById(relationshipId);
// Get Data in Excel file
SheetData sheetData = worksheetPart.Worksheet.Elements<SheetData>().First();
IEnumerable<Row> rowcollection = sheetData.Descendants<Row>();
if (rowcollection.Count() == 0)
{
return dt;
}
// Add columns
foreach (Cell cell in rowcollection.ElementAt(0))
{
dt.Columns.Add(GetValueOfCell(spreadsheetDocument, cell));
}
// Add rows into DataTable
foreach (Row row in rowcollection)
{
DataRow temprow = dt.NewRow();
int columnIndex = 0;
foreach (Cell cell in row.Descendants<Cell>())
{
// Get Cell Column Index
int cellColumnIndex = GetColumnIndex(GetColumnName(cell.CellReference));
if (columnIndex < cellColumnIndex)
{
do
{
temprow[columnIndex] = string.Empty;
columnIndex++;
}
while (columnIndex < cellColumnIndex);
}
temprow[columnIndex] = GetValueOfCell(spreadsheetDocument, cell);
columnIndex++;
}
// Add the row to DataTable
// the rows include header row
dt.Rows.Add(temprow);
}
}
// Here remove header row
dt.Rows.RemoveAt(0);
return dt;
}
catch (IOException ex)
{
throw new IOException(ex.Message);
}
}
private static string GetValueOfCell(SpreadsheetDocument spreadsheetdocument, Cell cell)
{
// Get value in Cell
SharedStringTablePart sharedString = spreadsheetdocument.WorkbookPart.SharedStringTablePart;
if (cell.CellValue == null)
{
return string.Empty;
}
string cellValue = cell.CellValue.InnerText;
// The condition that the Cell DataType is SharedString
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return sharedString.SharedStringTable.ChildElements[int.Parse(cellValue)].InnerText;
}
else
{
return cellValue;
}
}
private string GetColumnName(string cellReference)
{
// Create a regular expression to match the column name of cell
Regex regex = new Regex("[A-Za-z]+");
Match match = regex.Match(cellReference);
return match.Value;
}
private int GetColumnIndex(string columnName)
{
int columnIndex = 0;
int factor = 1;
// From right to left
for (int position = columnName.Length - 1; position >= 0; position--)
{
// For letters
if (Char.IsLetter(columnName[position]))
{
columnIndex += factor * ((columnName[position] - 'A') + 1) - 1;
factor *= 26;
}
}
return columnIndex;
}
public string GetXML(string filename)
{
using (DataSet ds = new DataSet())
{
ds.Tables.Add(this.ReadExcelFile(filename));
return ds.GetXml();
}
}
May somebody help me out with this issue?
Or any idea how to create a custom tags?
Thanks in advance.
I am trying to read excel using Open XML SDK in C# using Visual Studio 2013.
I followed the following links
http://msdn.microsoft.com/en-us/library/office/hh298534(v=office.15).aspx
http://msdn.microsoft.com/en-us/library/office/gg575571.aspx
The code is
using (SpreadsheetDocument document = SpreadsheetDocument.Open(excelPath, true))
{
WorkbookPart workbookPart = document.WorkbookPart;
foreach (WorksheetPart workSheetPart in workbookPart.WorksheetParts)
{
SheetData sheetData = workSheetPart.Worksheet.Elements<SheetData>().First();
IEnumerable<Row> rows = sheetData.Elements<Row>().Where(x => x.RowIndex > 1);
foreach (Row r in rows)
{
IEnumerable<string> textValues = from cell in r.Descendants<Cell>() where cell.CellValue != null select cell.CellValue.Text;
foreach (var cell in textValues)
{
string str = cell.ToString();
}
}
}
}
I tried the following code also
using (SpreadsheetDocument document = SpreadsheetDocument.Open(excelPath, true))
{
WorkbookPart workbookPart = document.WorkbookPart;
foreach (WorksheetPart workSheetPart in workbookPart.WorksheetParts)
{
SheetData sheetData = workSheetPart.Worksheet.Elements<SheetData>().First();
IEnumerable<Row> rows = sheetData.Elements<Row>().Where(x => x.RowIndex > 1);
foreach (Row r in rows)
{
List<Cell> cells = r.Descendants<Cell>().ToList();
foreach (var cell in cells)
{
if (cell != null)
{
string value = cell.CellValue.Text;
if (cell.DataType != null)
{
switch (cell.DataType.Value)
{
case CellValues.SharedString:
var stringTable = workSheetPart.GetPartsOfType<SharedStringTablePart>().FirstOrDefault();
if (stringTable != null)
{
value = stringTable.SharedStringTable.ElementAt(int.Parse(value)).InnerText;
}
break;
}
}
}
}
}
}
}
But both are returning only numeric values, not text. Can anyone please help on how to read excel text using Open XML SDK in C#.?
I have not used to OpenXML SDK directly, but have you tried used ClosedXML? It's a wrapper around the SDK that makes reading and writing Excel documents a breeze.
I would like to be able to open an excel document and loop through the values in the cells using Open XML in C#.
Tried using the below code, but it never gets past the Foreach (Row...). Also comments state this is for numeric values not Alpha Numeric values.
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(Filedirectory, false))
{
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();
SheetData sheetData = worksheetPart.Worksheet.Elements<SheetData>().First();
string text;
foreach (Row r in sheetData.Elements<Row>())
{
foreach (Cell c in r.Elements<Cell>())
{
text = c.CellValue.Text;
}
}
}
A coworker came up with the following solution.
FileInfo uploadedFile = new FileInfo(FileDirectory);
using (ASC.ExcelPackage.ExcelPackage xlPackage = new ASC.ExcelPackage.ExcelPackage(uploadedFile))
{
ASC.ExcelPackage.ExcelWorksheet worksheet = xlPackage.Workbook.Worksheets[1];
Int32 currentRow = 1;
while (worksheet.Cell(currentRow, 1) != null && !string.IsNullOrEmpty(worksheet.Cell(currentRow, 1).Value))
{
string value = worksheet.Cell(currentRow, 1).Value;
currentRow++;
}
}
I'm using Visual Studio 2008 and I need create a DataTable from a Excel Sheet using the Open XML SDK 2.0. I need to create it with the DataTable columns with the first row of the sheet and complete it with the rest of values.
Does anyone have a example code or a link that can help me to do this?
I think this should do what you're asking. The other function is there just to deal with if you have shared strings, which I assume you do in your column headers. Not sure this is perfect, but I hope it helps.
static void Main(string[] args)
{
DataTable dt = new DataTable();
using (SpreadsheetDocument spreadSheetDocument = SpreadsheetDocument.Open(#"..\..\example.xlsx", false))
{
WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart;
IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
string relationshipId = sheets.First().Id.Value;
WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId);
Worksheet workSheet = worksheetPart.Worksheet;
SheetData sheetData = workSheet.GetFirstChild<SheetData>();
IEnumerable<Row> rows = sheetData.Descendants<Row>();
foreach (Cell cell in rows.ElementAt(0))
{
dt.Columns.Add(GetCellValue(spreadSheetDocument, cell));
}
foreach (Row row in rows) //this will also include your header row...
{
DataRow tempRow = dt.NewRow();
for (int i = 0; i < row.Descendants<Cell>().Count(); i++)
{
tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i-1));
}
dt.Rows.Add(tempRow);
}
}
dt.Rows.RemoveAt(0); //...so i'm taking it out here.
}
public static string GetCellValue(SpreadsheetDocument document, Cell cell)
{
SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
string value = cell.CellValue.InnerXml;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
}
else
{
return value;
}
}
Hi The above code is working fine except one change
replace the below line of code
tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i-1));
with
tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i));
If you use (i-1) it will throw an exception:
specified argument was out of the range of valid values. parameter name index.
This solution works for spreadsheets without empty cells.
To handle empty cells, you will need to replace this line:
tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i-1));
with something like this:
Cell cell = row.Descendants<Cell>().ElementAt(i);
int index = CellReferenceToIndex(cell);
tempRow[index] = GetCellValue(spreadSheetDocument, cell);
And add this method:
private static int CellReferenceToIndex(Cell cell)
{
int index = -1;
string reference = cell.CellReference.ToString().ToUpper();
foreach (char ch in reference)
{
if (Char.IsLetter(ch))
{
int value = (int)ch - (int)'A';
index = (index + 1) * 26 + value;
}
else
return index;
}
return index;
}
This is my complete solution where empty cell is also taken into consideration.
public static class ExcelHelper
{
//To get the value of the cell, even it's empty. Unable to use loop by index
private static string GetCellValue(WorkbookPart wbPart, List<Cell> theCells, string cellColumnReference)
{
Cell theCell = null;
string value = "";
foreach (Cell cell in theCells)
{
if (cell.CellReference.Value.StartsWith(cellColumnReference))
{
theCell = cell;
break;
}
}
if (theCell != null)
{
value = theCell.InnerText;
// If the cell represents an integer number, you are done.
// For dates, this code returns the serialized value that represents the date. The code handles strings and
// Booleans individually. For shared strings, the code looks up the corresponding value in the shared string table. For Booleans, the code converts the value into the words TRUE or FALSE.
if (theCell.DataType != null)
{
switch (theCell.DataType.Value)
{
case CellValues.SharedString:
// For shared strings, look up the value in the shared strings table.
var stringTable = wbPart.GetPartsOfType<SharedStringTablePart>().FirstOrDefault();
// If the shared string table is missing, something is wrong. Return the index that is in the cell. Otherwise, look up the correct text in the table.
if (stringTable != null)
{
value = stringTable.SharedStringTable.ElementAt(int.Parse(value)).InnerText;
}
break;
case CellValues.Boolean:
switch (value)
{
case "0":
value = "FALSE";
break;
default:
value = "TRUE";
break;
}
break;
}
}
}
return value;
}
private static string GetCellValue(WorkbookPart wbPart, List<Cell> theCells, int index)
{
return GetCellValue(wbPart, theCells, GetExcelColumnName(index));
}
private static string GetExcelColumnName(int columnNumber)
{
int dividend = columnNumber;
string columnName = String.Empty;
int modulo;
while (dividend > 0)
{
modulo = (dividend - 1) % 26;
columnName = Convert.ToChar(65 + modulo).ToString() + columnName;
dividend = (int)((dividend - modulo) / 26);
}
return columnName;
}
//Only xlsx files
public static DataTable GetDataTableFromExcelFile(string filePath, string sheetName = "")
{
DataTable dt = new DataTable();
try
{
using (SpreadsheetDocument document = SpreadsheetDocument.Open(filePath, false))
{
WorkbookPart wbPart = document.WorkbookPart;
IEnumerable<Sheet> sheets = document.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
string sheetId = sheetName != "" ? sheets.Where(q => q.Name == sheetName).First().Id.Value : sheets.First().Id.Value;
WorksheetPart wsPart = (WorksheetPart)wbPart.GetPartById(sheetId);
SheetData sheetdata = wsPart.Worksheet.Elements<SheetData>().FirstOrDefault();
int totalHeaderCount = sheetdata.Descendants<Row>().ElementAt(0).Descendants<Cell>().Count();
//Get the header
for (int i = 1; i <= totalHeaderCount; i++)
{
dt.Columns.Add(GetCellValue(wbPart, sheetdata.Descendants<Row>().ElementAt(0).Elements<Cell>().ToList(), i));
}
foreach (Row r in sheetdata.Descendants<Row>())
{
if (r.RowIndex > 1)
{
DataRow tempRow = dt.NewRow();
//Always get from the header count, because the index of the row changes where empty cell is not counted
for (int i = 1; i <= totalHeaderCount; i++)
{
tempRow[i - 1] = GetCellValue(wbPart, r.Elements<Cell>().ToList(), i);
}
dt.Rows.Add(tempRow);
}
}
}
}
catch (Exception ex)
{
}
return dt;
}
}
First Add ExcelUtility.cs to your project :
ExcelUtility.cs
using System.Data;
using System.Linq;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
namespace Core_Excel.Utilities
{
static class ExcelUtility
{
public static DataTable Read(string path)
{
var dt = new DataTable();
using (var ssDoc = SpreadsheetDocument.Open(path, false))
{
var sheets = ssDoc.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
var relationshipId = sheets.First().Id.Value;
var worksheetPart = (WorksheetPart) ssDoc.WorkbookPart.GetPartById(relationshipId);
var workSheet = worksheetPart.Worksheet;
var sheetData = workSheet.GetFirstChild<SheetData>();
var rows = sheetData.Descendants<Row>().ToList();
foreach (var row in rows) //this will also include your header row...
{
var tempRow = dt.NewRow();
var colCount = row.Descendants<Cell>().Count();
foreach (var cell in row.Descendants<Cell>())
{
var index = GetIndex(cell.CellReference);
// Add Columns
for (var i = dt.Columns.Count; i <= index; i++)
dt.Columns.Add();
tempRow[index] = GetCellValue(ssDoc, cell);
}
dt.Rows.Add(tempRow);
}
}
return dt;
}
private static string GetCellValue(SpreadsheetDocument document, Cell cell)
{
var stringTablePart = document.WorkbookPart.SharedStringTablePart;
var value = cell.CellValue.InnerXml;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
return stringTablePart.SharedStringTable.ChildElements[int.Parse(value)].InnerText;
return value;
}
public static int GetIndex(string name)
{
if (string.IsNullOrWhiteSpace(name))
return -1;
int index = 0;
foreach (var ch in name)
{
if (char.IsLetter(ch))
{
int value = ch - 'A' + 1;
index = value + index * 26;
}
else
break;
}
return index - 1;
}
}
}
Usage :
var path = "D:\\Documents\\test.xlsx";
var dt = ExcelUtility.Read(path);
then enjoy it!
Public Shared Function ExcelToDataTable(filename As String) As DataTable
Try
Dim dt As New DataTable()
Using doc As SpreadsheetDocument = SpreadsheetDocument.Open(filename, False)
Dim workbookPart As WorkbookPart = doc.WorkbookPart
Dim sheets As IEnumerable(Of Sheet) = doc.WorkbookPart.Workbook.GetFirstChild(Of Sheets)().Elements(Of Sheet)()
Dim relationshipId As String = sheets.First().Id.Value
Dim worksheetPart As WorksheetPart = DirectCast(doc.WorkbookPart.GetPartById(relationshipId), WorksheetPart)
Dim workSheet As Worksheet = worksheetPart.Worksheet
Dim sheetData As SheetData = workSheet.GetFirstChild(Of SheetData)()
Dim rows As IEnumerable(Of Row) = sheetData.Descendants(Of Row)()
For Each cell As Cell In rows.ElementAt(0)
dt.Columns.Add(GetCellValue(doc, cell))
Next
For Each row As Row In rows
'this will also include your header row...
Dim tempRow As DataRow = dt.NewRow()
For i As Integer = 0 To row.Descendants(Of Cell)().Count() - 1
tempRow(i) = GetCellValue(doc, row.Descendants(Of Cell)().ElementAt(i))
Next
dt.Rows.Add(tempRow)
Next
End Using
dt.Rows.RemoveAt(0)
Return dt
Catch ex As Exception
Throw ex
End Try
End Function
Public Shared Function GetCellValue(document As SpreadsheetDocument, cell As Cell) As String
Try
If IsNothing(cell.CellValue) Then
Return ""
End If
Dim value As String = cell.CellValue.InnerXml
If cell.DataType IsNot Nothing AndAlso cell.DataType.Value = CellValues.SharedString Then
Dim stringTablePart As SharedStringTablePart = document.WorkbookPart.SharedStringTablePart
Return stringTablePart.SharedStringTable.ChildElements(Int32.Parse(value)).InnerText
Else
Return value
End If
Catch ex As Exception
Return ""
End Try
End Function
I know it is a long time ago since this thread started. However, none of the solutions above did not really work for me. Empty cells issue and others.
I found a very good solution with 'MIT' license on GitHub:
https://github.com/ExcelDataReader/ExcelDataReader
This worked for me for both C# and VBnet applications.
Sample call from VBNET (the sample code for c# is on GitHub) :
Using stream As FileStream = New FileStream(DataPath & "\" & fName.Name, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)
Using reader As IExcelDataReader = ExcelReaderFactory.CreateReader(stream)
ds = reader.AsDataSet(New ExcelDataSetConfiguration() With {
.UseColumnDataType = False,
.ConfigureDataTable = Function(tableReader) New ExcelDataTableConfiguration() With {
.UseHeaderRow = True
}
})
End Using
End Using
The result was a dataset with one table for each sheet in the workbook.
An I really like to compile the dll made in C# by myself rather then using a ready dll. So I can control what I am delivering to customers.
if rows value is null or empty get values wrong work.
all columns filled with data if it is working true. but maybe all rows not