I am a new C# developer and I am struggling now with converting the columns in the following DataTable into rows. The current DataTable structure I have is:
Id Value
1 Test#1
1 Test#2
2 Car#1
2 Car#2
3 Airplane#1
3 Airplane#2
I need to convert it into the following table structure:
1 2 3
Test#1 Car#1 Airplane#1
Test#2 Car#2 Airplane#2
I tried to implement it using the following method I found in Google, but I am still not getting the desired result:
private DataTable GenerateTransposedTable(DataTable inputTable)
{
DataTable outputTable = new DataTable();
// Add columns by looping rows
// Header row's first column is same as in inputTable
outputTable.Columns.Add(inputTable.Columns[0].ColumnName.ToString());
// Header row's second column onwards, 'inputTable's first column taken
foreach (DataRow inRow in inputTable.Rows)
{
string newColName = inRow[0].ToString();
outputTable.Columns.Add(newColName);
}
// Add rows by looping columns
for (int rCount = 1; rCount <= inputTable.Columns.Count - 1; rCount++)
{
DataRow newRow = outputTable.NewRow();
// First column is inputTable's Header row's second column
newRow[0] = inputTable.Columns[rCount].ColumnName.ToString();
for (int cCount = 0; cCount <= inputTable.Rows.Count - 1; cCount++)
{
string colValue = inputTable.Rows[cCount][rCount].ToString();
newRow[cCount + 1] = colValue;
}
outputTable.Rows.Add(newRow);
}
return outputTable;
}
Could you please tell me how I can transpose the first DataTable into the second one?
Using an extension method, you can pivot the original DataTable by creating the implied row number column that groups the answer rows together:
public static class DataTableExt {
// Transpose a DataTable to a new DataTable
// over field creates new column names
// value field is value for new columns
// Original datatable must be sorted in OverColFieldName then row number order
public static DataTable Transpose(this DataTable dt, string OverColFieldName, string WithValueFieldName) {
var res = new DataTable();
if (dt.Rows.Count > 0) {
var ColCount = dt.AsEnumerable().Select(r => r.Field<int>(OverColFieldName)).Distinct().Count();
var rowCount = dt.Rows.Count / ColCount;
var rowNumbers = Enumerable.Range(0, rowCount*ColCount).Select(rn => rn % rowCount +1);
var rowGroups = dt.AsEnumerable()
.Zip(rowNumbers, (r, rn) => new { Row = r, RowNum = rn }) // associate an answer Row Number with each row
.GroupBy(rrn => rrn.RowNum, rrn => new { Over = rrn.Row[OverColFieldName].ToString(), With = rrn.Row[WithValueFieldName] }); // group the columns for each answer row together
var valueDataType = dt.Columns[WithValueFieldName].DataType;
var colNames = rowGroups.SelectMany(rg => rg.Select(r => r.Over)).Distinct().OrderBy(n => n);
foreach (var n in colNames)
res.Columns.Add(n, valueDataType);
foreach (var rowGroup in rowGroups) {
var newr = res.NewRow();
foreach (var r in rowGroup)
newr[r.Over] = r.With;
res.Rows.Add(newr);
}
}
return res;
}
}
And you can use it by passing in the Column Names for the source of the new columns and the values to be placed in each column:
var ans = dt.Transpose("Id", "Value");
I want to be able remove DataRows in a DataTable that contain null values for arrays containing no numeric data.
Trying to modify the code below which transposes the DataTable for use by Google Visualisation API:
private DataTable TransposeOpiate(DataTable inputTable)
{
DataTable outputTable = new DataTable();
// Add columns by looping rows
// Header row's first column is same as in inputTable
outputTable.Columns.Add(inputTable.Columns[0].ColumnName.ToString());
// Header row's second column onwards, 'inputTable's first column taken
foreach (DataRow inRow in inputTable.Rows)
{
string newColName = inRow[0].ToString();
outputTable.Columns.Add(newColName);
}
// Add rows by looping columns
for (int rCount = 1; rCount <= inputTable.Columns.Count - 1; rCount++)
{
DataRow newRow = outputTable.NewRow();
// First column is inputTable's Header row's second column
newRow[0] = inputTable.Columns[rCount].ColumnName.ToString();
for (int cCount = 0; cCount <= inputTable.Rows.Count - 1; cCount++)
{
string colValue = inputTable.Rows[cCount][rCount].ToString();
newRow[cCount + 1] = colValue;
}
outputTable.Rows.Add(newRow);
}
return outputTable;
}
For example in the intellisense output shown below the row for improved, where the values after contain no data (this would normally be numeric values) instead contains empty indexes:
I need to be able to delete these rows if they are in the above format, how can I modify my code to perform this?
Why not set a condition which prevents adding the row if all the row cells have a null value?
if (newRow.ItemArray.Any(x => x != null))
{
outputTable.Rows.Add(newRow);
}
If you need to check for columns as well, do this right before returning outputTable
for (int col = outputTable.Columns.Count - 1; col >= 0; col--)
{
bool toDelete = true;
for (int row = 0; row < outputTable.Rows.Count; row++)
{
if (outputTable.Rows[row][col] != null)
{
toDelete = false;
}
}
if (toDelete)
{
outputTable.Columns.RemoveAt(col);
}
}
You can try with something like this (comments are in code):
private void CheckColumns()
{
//table which we want to check
DataTable table = new DataTable();
//add column definition - first column will be string, other two are int columns
table.Columns.Add("string column", typeof(string));
table.Columns.Add("int column 1", typeof(int));
table.Columns.Add("int column 2", typeof(int));
//add data - in this example rows "abc" and "ghi" are valid because they have at least one numeric column
table.Rows.Add(new object[] { "abc", 1, 2 });
table.Rows.Add(new object[] { "def", null, null });
table.Rows.Add(new object[] { "ghi", null, 2 });
table.Rows.Add(new object[] { "jkl", null, null });
//filter rows in a way, using Linq, that rows are filtered where at least one column has numeric value
var validRows = table.AsEnumerable().Where(r => r.ItemArray.Any(c => IsNumeric(c))).ToList();
}
//this is helper method that code will call for each value in each row
private bool IsNumeric(object value)
{
int outputValue;
return int.TryParse(value.ToString(), out outputValue);
}
If you'll have decimal values, then you should une decimal.TyrParse in IsNumeric method.
If you need to be sure that all columns except first one have numeric values, you can do it like this (skipping first value from each row and checking if all other values are numeric)...
var validRows = table.AsEnumerable().Where(r => r.ItemArray.Skip(1).All(c => IsNumeric(c))).ToList();
once you have those 'valid' rows, you can write that data into another table or do whatever you need to do with those data...
Instead of .ToList() you can use .CopyToDataTable() method so you'll get new DataTable with those 'valid' rows. For example:
DataTable newDataTable = table.AsEnumerable().Where(r => r.ItemArray.Skip(1).All(c => IsNumeric(c))).CopyToDataTable();
I am trying to scrape data from the webpage. However, I am having a trouble scraping all of data in the table. I need to switch pages to get all the data and I am willing to get an output with DataGridTable. I am having a trouble figuring out how to do this even though there is a change with number of pages they have in the website. I would like to add information automatically on a data grid table pages by pages. My input(Website) is only showing 25 items. Thats why I have 25 items in DataGridTable. I would like to justify a "number of pages" from "go to end page button"'s element. So that my program knows how many pages are there to scrape from the website. but, if there's a different way, I wanna know thank you.
This is my code for now.
DataTable dt = new DataTable();
var header = driver.FindElement(By.CssSelector("#gridComponent > div.k-grid-header"));
foreach (var row in header.FindElements(By.TagName("tr")))
{
//Configure Number of Col and row
int cellIndex = 0;
string[] arr = new string[32];
//Get Cell Data
foreach (var cell in row.FindElements(By.TagName("th")))
{
// Check the header cell for a checkbox child. If no
// such child exists, add the column.
var headerCheckboxes = cell.FindElements(By.CssSelector("input[type='checkbox']"));
if (headerCheckboxes.Count == 0)
{
//Number of Col Data Load
if (cellIndex <= 29)
{
arr[cellIndex] = cell.Text;
dt.Columns.Add(cell.Text);
}
else
cellIndex++;
}
}
Console.WriteLine(arr);
}
var table = driver.FindElement(By.CssSelector("#gridComponent"));
//Get Row value
foreach (var row in table.FindElements(By.TagName("tr")))
{
//Configure Number of Col and row
int cellIndex = 0;
// Use a list instead of an array
List<string> arr = new List<string>();
//Get Cell Data
foreach (var cell in row.FindElements(By.TagName("td")))
{
// Skip the first column in the row by checking
// if the cell index is 0.
if (cellIndex != 0)
{
string cellValue = "";
Console.WriteLine(cell);
var checkboxes = cell.FindElements(By.CssSelector("input[type='checkbox']"));
if (checkboxes.Count > 0)
{
bool isChecked = false;
isChecked = checkboxes[0].Selected;
cellValue = isChecked.ToString();
}
else
{
cellValue = cell.Text;
}
arr.Add(cellValue);
}
cellIndex++;
}
dt.Rows.Add(arr.ToArray());
}
dataGridView1.DataSource = dt;
driver.FindElement(By.CssSelector("#gridComponent > div.k-pager-wrap.k-grid-pager.k-widget.k-floatwrap > ul > li:nth-child(3)")).Click();
}
This is the table that I am trying to scrape from.
This is the code for the following element that is shown picture above.
<span class="k-icon k-i-arrow-end-right"></span>
Thank you so much.
You may want to consider the index information "1 - 25 out of 64 items", since it is a good indicator of the total number of pages.
Batch = 1 - 25 i.e. 25 items per page
Total items = 64
No. of pages = roundup (64 / 25)
PS: A better option, without any computations, maybe to get the "data-page" attribute of the last page button.
I Finally got the answer for this.
private List<List<string>> GetRecords(IWebElement table)
{
List<List<string>> rows = new List<List<string>>(); ;
//Get Row value
foreach (var row in table.FindElements(By.TagName("tr")))
{
//Configure Number of Col and row
int cellIndex = 0;
// Use a list instead of an array
List<string> cols = new List<string>();
//Get Cell Data
foreach (var cell in row.FindElements(By.TagName("td")))
{
// Skip the first column in the row by checking
// if the cell index is 0.
if (cellIndex != 0)
{
string cellValue = "";
Console.WriteLine(cell);
var checkboxes = cell.FindElements(By.CssSelector("input[type='checkbox']"));
if (checkboxes.Count > 0)
{
bool isChecked = false;
isChecked = checkboxes[0].Selected;
cellValue = isChecked.ToString();
}
else
{
cellValue = cell.Text;
}
cols.Add(cellValue);
}
cellIndex++;
}
rows.Add(cols);
}
return rows;
}
private void button1_Click(object sender, EventArgs e)
{
//Configure to Hide CMD
var chromeDriverService = ChromeDriverService.CreateDefaultService();
chromeDriverService.HideCommandPromptWindow = true;
//Configure to Hide Chrome
ChromeOptions option = new ChromeOptions();
option.AddArgument("--headless");
//HIDING CHROME UN-COMMNET THE SECOND ONE TO SHOW
//IWebDriver driver = new ChromeDriver(chromeDriverService, option);
IWebDriver driver = new ChromeDriver();
driver.Url = "**************";
driver.Manage().Window.Maximize();
driver.SwitchTo().DefaultContent();
//Log-in
driver.FindElement(By.Id("username")).SendKeys("*****");
driver.FindElement(By.Id("password")).SendKeys("******" + OpenQA.Selenium.Keys.Enter);
//Entering Access Code
driver.FindElement(By.Id("password")).SendKeys("*******");
driver.FindElement(By.Id("accesscode")).SendKeys("********" + OpenQA.Selenium.Keys.Enter);
//go to CustomerList
driver.Navigate().GoToUrl("***********");
driver.Navigate().GoToUrl("*****************");
//Wait till load 3 seconds
waitOnPage(2);
DataTable dt = new DataTable();
var header = driver.FindElement(By.CssSelector("#gridComponent > div.k-grid-header"));
foreach (var row in header.FindElements(By.TagName("tr")))
{
//Configure Number of Col and row
int cellIndex = 0;
string[] arr = new string[32];
//Get Cell Data
foreach (var cell in row.FindElements(By.TagName("th")))
{
// Check the header cell for a checkbox child. If no
// such child exists, add the column.
var headerCheckboxes = cell.FindElements(By.CssSelector("input[type='checkbox']"));
if (headerCheckboxes.Count == 0)
{
//Number of Col Data Load
if (cellIndex <= 29)
{
arr[cellIndex] = cell.Text;
dt.Columns.Add(cell.Text);
}
else
cellIndex++;
}
}
Console.WriteLine(arr);
}
var table = driver.FindElement(By.CssSelector("#gridComponent"));
List<List<string>> records = GetRecords(table);
// Supposing you want the footer information
var lastPageStr = table.FindElement(By.ClassName("k-pager-last")).GetAttribute("data-page");
var lastPage = Convert.ToInt16(lastPageStr);
// You can select other info lik this
// class="k-link k-pager-nav" data-page="1"
driver.FindElement(By.CssSelector("#gridComponent > div.k-pager-wrap.k-grid-pager.k-widget.k-floatwrap > ul > li:nth-child(3)")).Click();
// Cycle over the pages
for (int p = 0; p < (lastPage - 1); p++)
{
driver.FindElement(By.CssSelector("#gridComponent > div.k-pager-wrap.k-grid-pager.k-widget.k-floatwrap > a:nth-child(4) > span")).Click();
waitOnPage(2);
var rows = GetRecords(table);
records.AddRange(rows);
}
// Add all rows to DT
//dt.Rows.Add(records[4].ToArray());
foreach(var row in records)
{
dt.Rows.Add(row.ToArray());
}
dataGridView1.DataSource = dt;
}
I need to replace integers values inside a datatable with a string symbol (*) if they are greater than 0 and less than 5.
So far I can loop through each row and the corresponding columns but I am unable to get the individual values contained in the datatable.
The code I have written so far is shown below:
public static DataTable SupressDataTable(DataTable cases)
{
DataTable suppressedDataTable = new DataTable();
foreach (var row in cases.Rows)
{
foreach (DataColumn column in cases.Columns)
{
if (column.IsNumeric())
{
}
}
}
return suppressedDataTable;
}
public static bool IsNumeric(this DataColumn col)
{
if (col == null)
return false;
// Make this const
var numericTypes = new[] { typeof(Byte), typeof(Decimal), typeof(Double),
typeof(Int16), typeof(Int32), typeof(Int64), typeof(SByte),
typeof(Single), typeof(UInt16), typeof(UInt32), typeof(UInt64)};
return ((IList) numericTypes).Contains(col.DataType);
}
How do I get to the values and then replace them?
You can't do that with the original table because an int or double column is not a string column. You need a new table where each numeric column is replaced with a string column:
public static DataTable SupressDataTable(DataTable cases)
{
DataTable suppressedDataTable = cases.Copy(); // Same columns, same data
DataColumn[] allNumericColumns = suppressedDataTable.Columns.Cast<DataColumn>().Where(IsNumeric).ToArray();
foreach (DataColumn numericCol in allNumericColumns)
{
int ordinal = numericCol.Ordinal; // need to store before remove
suppressedDataTable.Columns.Remove(numericCol);
suppressedDataTable.Columns.Add(numericCol.ColumnName); // string column
suppressedDataTable.Columns[numericCol.ColumnName].SetOrdinal(ordinal);
}
for (int index = 0; index < suppressedDataTable.Rows.Count; index++)
{
DataRow row = suppressedDataTable.Rows[index];
foreach (DataColumn column in cases.Columns)
{
if (IsNumeric(column))
{
dynamic numVal = cases.Rows[index][column];
string newValue = numVal > 0 && numVal < 5 ? "*" : numVal.ToString();
row.SetField(column.Ordinal, newValue);
}
}
}
return suppressedDataTable;
}
I'm using the following code to convert an Excel to a datatable using EPPlus:
public DataTable ExcelToDataTable(string path)
{
var pck = new OfficeOpenXml.ExcelPackage();
pck.Load(File.OpenRead(path));
var ws = pck.Workbook.Worksheets.First();
DataTable tbl = new DataTable();
bool hasHeader = true;
foreach (var firstRowCell in ws.Cells[1, 1, 1, ws.Dimension.End.Column])
{
tbl.Columns.Add(hasHeader ? firstRowCell.Text : string.Format("Column {0}", firstRowCell.Start.Column));
}
var startRow = hasHeader ? 2 : 1;
for (var rowNum = startRow; rowNum <= ws.Dimension.End.Row; rowNum++)
{
var wsRow = ws.Cells[rowNum, 1, rowNum, ws.Dimension.End.Column];
var row = tbl.NewRow();
foreach (var cell in wsRow)
{
row[cell.Start.Column - 1] = cell.Text;
}
tbl.Rows.Add(row);
}
pck.Dispose();
return tbl;
}
It creates the Excel, however, when I try to open it, it gives me the message that it is locked for editing by another user and that I can only open it in Read-Only mode.
I thought using:
pck.Dispose();
would solve the issue, however I'm still getting the same error.
Also, when I try to delete the file, I get the message: The action can't be completed because the file is open in WebDev.WebServer40.EXE.
Any ideas how to resolve this?
Thanks in advance. :)
I see, that's what i've posted recently here(now corrected). It can be improved since the ExcelPackage and the FileStream(from File.OpenRead) are not disposed after using.
public static DataTable GetDataTableFromExcel(string path, bool hasHeader = true)
{
using (var pck = new OfficeOpenXml.ExcelPackage())
{
using (var stream = File.OpenRead(path))
{
pck.Load(stream);
}
var ws = pck.Workbook.Worksheets.First();
DataTable tbl = new DataTable();
foreach (var firstRowCell in ws.Cells[1, 1, 1, ws.Dimension.End.Column])
{
tbl.Columns.Add(hasHeader ? firstRowCell.Text : string.Format("Column {0}", firstRowCell.Start.Column));
}
var startRow = hasHeader ? 2 : 1;
for (int rowNum = startRow; rowNum <= ws.Dimension.End.Row; rowNum++)
{
var wsRow = ws.Cells[rowNum, 1, rowNum, ws.Dimension.End.Column];
DataRow row = tbl.Rows.Add();
foreach (var cell in wsRow)
{
row[cell.Start.Column - 1] = cell.Text;
}
}
return tbl;
}
}
A extension version of Tim Schmelter's answer.
public static DataTable ToDataTable(this ExcelWorksheet ws, bool hasHeaderRow = true)
{
var tbl = new DataTable();
foreach (var firstRowCell in ws.Cells[1, 1, 1, ws.Dimension.End.Column])
tbl.Columns.Add(hasHeaderRow ?
firstRowCell.Text : string.Format("Column {0}", firstRowCell.Start.Column));
var startRow = hasHeaderRow ? 2 : 1;
for (var rowNum = startRow; rowNum <= ws.Dimension.End.Row; rowNum++)
{
var wsRow = ws.Cells[rowNum, 1, rowNum, ws.Dimension.End.Column];
var row = tbl.NewRow();
foreach (var cell in wsRow) row[cell.Start.Column - 1] = cell.Text;
tbl.Rows.Add(row);
}
return tbl;
}
I've created a method that converts an Excel file to a DataTable using EPPlus, and tried to maintain Type Safety. Also duplicate column names are handled and with a boolean you can tell the method wether the sheet has a row with headers. I've created it for a complex import process that has several steps after uploading that requires user input before committing to the database.
private DataTable ExcelToDataTable(byte[] excelDocumentAsBytes, bool hasHeaderRow)
{
DataTable dt = new DataTable();
string errorMessages = "";
//create a new Excel package in a memorystream
using (MemoryStream stream = new MemoryStream(excelDocumentAsBytes))
using (ExcelPackage excelPackage = new ExcelPackage(stream))
{
ExcelWorksheet worksheet = excelPackage.Workbook.Worksheets[1];
//check if the worksheet is completely empty
if (worksheet.Dimension == null)
{
return dt;
}
//add the columns to the datatable
for (int j = worksheet.Dimension.Start.Column; j <= worksheet.Dimension.End.Column; j++)
{
string columnName = "Column " + j;
var excelCell = worksheet.Cells[1, j].Value;
if (excelCell != null)
{
var excelCellDataType = excelCell;
//if there is a headerrow, set the next cell for the datatype and set the column name
if (hasHeaderRow == true)
{
excelCellDataType = worksheet.Cells[2, j].Value;
columnName = excelCell.ToString();
//check if the column name already exists in the datatable, if so make a unique name
if (dt.Columns.Contains(columnName) == true)
{
columnName = columnName + "_" + j;
}
}
//try to determine the datatype for the column (by looking at the next column if there is a header row)
if (excelCellDataType is DateTime)
{
dt.Columns.Add(columnName, typeof(DateTime));
}
else if (excelCellDataType is Boolean)
{
dt.Columns.Add(columnName, typeof(Boolean));
}
else if (excelCellDataType is Double)
{
//determine if the value is a decimal or int by looking for a decimal separator
//not the cleanest of solutions but it works since excel always gives a double
if (excelCellDataType.ToString().Contains(".") || excelCellDataType.ToString().Contains(","))
{
dt.Columns.Add(columnName, typeof(Decimal));
}
else
{
dt.Columns.Add(columnName, typeof(Int64));
}
}
else
{
dt.Columns.Add(columnName, typeof(String));
}
}
else
{
dt.Columns.Add(columnName, typeof(String));
}
}
//start adding data the datatable here by looping all rows and columns
for (int i = worksheet.Dimension.Start.Row + Convert.ToInt32(hasHeaderRow); i <= worksheet.Dimension.End.Row; i++)
{
//create a new datatable row
DataRow row = dt.NewRow();
//loop all columns
for (int j = worksheet.Dimension.Start.Column; j <= worksheet.Dimension.End.Column; j++)
{
var excelCell = worksheet.Cells[i, j].Value;
//add cell value to the datatable
if (excelCell != null)
{
try
{
row[j - 1] = excelCell;
}
catch
{
errorMessages += "Row " + (i - 1) + ", Column " + j + ". Invalid " + dt.Columns[j - 1].DataType.ToString().Replace("System.", "") + " value: " + excelCell.ToString() + "<br>";
}
}
}
//add the new row to the datatable
dt.Rows.Add(row);
}
}
//show error messages if needed
Label1.Text = errorMessages;
return dt;
}
The webforms button click for demo purposes.
protected void Button1_Click(object sender, EventArgs e)
{
if (FileUpload1.HasFile)
{
DataTable dt = ExcelToDataTable(FileUpload1.FileBytes, CheckBox1.Checked);
GridView1.DataSource = dt;
GridView1.DataBind();
}
}
VDWWD's answer above works great to keep type safety, and I built upon it with some improvements.
Method reads from a file directly.
Column type detection by using all rows and not just one value. Column type is set to String if more than type is found in the column.
Error Messages returned in a list of strings.
Here is the updated version:
public static DataTable ExcelToDataTable(string path, ref List<string> errorList, bool hasHeaderRow = true )
{
DataTable dt = new DataTable();
errorList = new List<string>();
//create a new Excel package
using (ExcelPackage excelPackage = new ExcelPackage())
{
using (var stream = File.OpenRead(path))
{
excelPackage.Load(stream);
}
ExcelWorksheet worksheet = excelPackage.Workbook.Worksheets[1];
//check if the worksheet is completely empty
if (worksheet.Dimension == null)
{
return dt;
}
//add the columns to the datatable
for (int j = worksheet.Dimension.Start.Column; j <= worksheet.Dimension.End.Column; j++)
{
string columnName = "Column " + j;
//Build hashset with all types in the row
var columnTypes = new HashSet<Type>();
for (int i = worksheet.Dimension.Start.Row + Convert.ToInt32(hasHeaderRow); i <= worksheet.Dimension.End.Row; i++)
{
//Only add type if cell value not empty
if (worksheet.Cells[i, j].Value != null)
{
columnTypes.Add(worksheet.Cells[i, j].Value.GetType());
}
}
var excelCell = worksheet.Cells[1, j].Value;
if (excelCell != null)
{
Type excelCellDataType = null;
//if there is a headerrow, set the next cell for the datatype and set the column name
if (hasHeaderRow == true)
{
columnName = excelCell.ToString();
//check if the column name already exists in the datatable, if so make a unique name
if (dt.Columns.Contains(columnName) == true)
{
columnName = columnName + "_" + j;
}
}
//Select input type for the column
if (columnTypes.Count == 1)
{
excelCellDataType = columnTypes.First();
}
else
{
excelCellDataType = typeof(string);
}
//try to determine the datatype for the column (by looking at the next column if there is a header row)
if (excelCellDataType == typeof(DateTime))
{
dt.Columns.Add(columnName, typeof(DateTime));
}
else if (excelCellDataType == typeof(Boolean))
{
dt.Columns.Add(columnName, typeof(Boolean));
}
else if (excelCellDataType == typeof(Double))
{
//determine if the value is a decimal or int by looking for a decimal separator
//not the cleanest of solutions but it works since excel always gives a double
if (excelCellDataType.ToString().Contains(".") || excelCellDataType.ToString().Contains(","))
{
dt.Columns.Add(columnName, typeof(Decimal));
}
else
{
dt.Columns.Add(columnName, typeof(Int64));
}
}
else
{
dt.Columns.Add(columnName, typeof(String));
}
}
else
{
dt.Columns.Add(columnName, typeof(String));
}
}
//start adding data the datatable here by looping all rows and columns
for (int i = worksheet.Dimension.Start.Row + Convert.ToInt32(hasHeaderRow); i <= worksheet.Dimension.End.Row; i++)
{
//create a new datatable row
DataRow row = dt.NewRow();
//loop all columns
for (int j = worksheet.Dimension.Start.Column; j <= worksheet.Dimension.End.Column; j++)
{
var excelCell = worksheet.Cells[i, j].Value;
//add cell value to the datatable
if (excelCell != null)
{
try
{
row[j - 1] = excelCell;
}
catch
{
errorList.Add("Row " + (i - 1) + ", Column " + j + ". Invalid " + dt.Columns[j - 1].DataType.ToString().Replace("System.", "") + " value: " + excelCell.ToString() );
}
}
}
//add the new row to the datatable
dt.Rows.Add(row);
}
}
return dt;
}
This is an improvement to the generic one above. Use is if you have a class with the following properties, "Name", "Surname", "Telephone", "Fax" and you have a excel sheet with the first row with the same names, it will load the excel rows into a class object and pop it into a List
public static List<T> GetClassFromExcel<T>(string path, int fromRow, int fromColumn, int toRow = 0, int toColumn = 0)
{
if (toColumn != 0 && toColumn < fromColumn) throw new Exception("toColumn can not be less than fromColumn");
if (toRow != 0 && toRow < fromRow) throw new Exception("toRow can not be less than fromRow");
List<T> retList = new List<T>();
using (var pck = new ExcelPackage())
{
using (var stream = File.OpenRead(path))
{
pck.Load(stream);
}
//Retrieve first Worksheet
var ws = pck.Workbook.Worksheets.First();
//If the to column is empty or 0, then make the tocolumn to the count of the properties
//Of the class object inserted
toColumn = toColumn == 0 ? typeof(T).GetProperties().Count() : toColumn;
//Read the first Row for the column names and place into a list so that
//it can be used as reference to properties
Dictionary<string, int> columnNames = new Dictionary<string, int>();
// wsRow = ws.Row(0);
var colPosition = 0;
foreach (var cell in ws.Cells[1, 1, 1, toColumn == 0 ? ws.Dimension.Columns : toColumn])
{
columnNames.Add(cell.Value.ToString(), colPosition);
colPosition++;
}
//create a instance of T
T objT = Activator.CreateInstance<T>();
//Retrieve the type of T
Type myType = typeof(T);
//Get all the properties associated with T
PropertyInfo[] myProp = myType.GetProperties();
//Loop through the rows of the excel sheet
for (var rowNum = fromRow; rowNum <= (toRow == 0? ws.Dimension.End.Row : toRow); rowNum++)
{
var wsRow = ws.Cells[rowNum, fromColumn, rowNum, ws.Cells.Count()];
foreach (var propertyInfo in myProp)
{
if (columnNames.ContainsKey(propertyInfo.Name))
{
int position = 0;
columnNames.TryGetValue(propertyInfo.Name, out position);
//int position = columnNames.IndexOf(propertyInfo.Name);
//To prevent an exception cast the value to the type of the property.
propertyInfo.SetValue(objT, Convert.ChangeType(wsRow[rowNum, position + 1].Value, propertyInfo.PropertyType));
}
}
retList.Add(objT);
}
}
return retList;
}
now you can use the list as a databinding source if you need...
A give from me to you... :) Daniel C. Vrey
Updated it for toColumn to work and added toRow and followed Andreas suggestions. Thumbs up for Andreas
public static List<T> getClassFromExcel<T>(string path, int fromRow, int fromColumn, int toColumn = 0) where T : class
{
using (var pck = new OfficeOpenXml.ExcelPackage())
{
List<T> retList = new List<T>();
using (var stream = File.OpenRead(path))
{
pck.Load(stream);
}
var ws = pck.Workbook.Worksheets.First();
toColumn = toColumn == 0 ? typeof(T).GetProperties().Count() : toColumn;
for (var rowNum = fromRow; rowNum <= ws.Dimension.End.Row; rowNum++)
{
T objT = Activator.CreateInstance<T>();
Type myType = typeof(T);
PropertyInfo[] myProp = myType.GetProperties();
var wsRow = ws.Cells[rowNum, fromColumn, rowNum, toColumn];
for (int i = 0; i < myProp.Count(); i++)
{
myProp[i].SetValue(objT, wsRow[rowNum, fromColumn + i].Text);
}
retList.Add(objT);
}
return retList;
}
}
public static List<T> GetClassFromExcel<T>(string path, int fromRow, int fromColumn, int toRow = 0, int toColumn = 0) where T: class, new()
{
if (toColumn != 0 && toColumn < fromColumn) throw new Exception("toColumn can not be less than fromColumn");
if (toRow != 0 && toRow < fromRow) throw new Exception("toRow can not be less than fromRow");
List<T> retList = new List<T>();
using (var pck = new ExcelPackage())
{
using (var stream = File.OpenRead(path))
{
pck.Load(stream);
}
//Retrieve first Worksheet
var ws = pck.Workbook.Worksheets.First();
toColumn = toColumn == 0 ? typeof(T).GetProperties().Count() : toColumn; //If the to column is empty or 0, then make the tocolumn to the count of the properties Of the class object inserted
//Read the first Row for the column names and place into a list so that
//it can be used as reference to properties
Dictionary<string, int> columnNames = new Dictionary<string, int>();
// wsRow = ws.Row(0);
var colPosition = 0;
foreach (var cell in ws.Cells[1, 1, 1, toColumn == 0 ? ws.Dimension.Columns : toColumn])
{
columnNames.Add(cell.Value.ToString(), colPosition);
colPosition++;
}
//Retrieve the type of T
Type myType = typeof(T);
//Get all the properties associated with T
PropertyInfo[] myProp = myType.GetProperties();
//Loop through the rows of the excel sheet
for (var rowNum = fromRow + 1; rowNum <= (toRow == 0 ? ws.Dimension.End.Row : toRow); rowNum++) // fromRow + 1 to read from next row after columnheader
{
//create a instance of T
//T objT = Activator.CreateInstance<T>();
T objT = new T();
// var wsRow = ws.Cells[rowNum, fromColumn, rowNum, ws.Cells.Count()]; //ws.Cells.Count() causing out of range error hence using ws.Dimension.Columns to get last column index
var wsRow = ws.Cells[rowNum, fromColumn, rowNum, ws.Dimension.Columns];
foreach (var propertyInfo in myProp)
{
var attribute = propertyInfo.GetCustomAttributes(typeof(DisplayNameAttribute), true).Cast<DisplayNameAttribute>().SingleOrDefault();
string displayName = attribute != null && !string.IsNullOrEmpty(attribute.DisplayName) ? attribute.DisplayName : propertyInfo.Name; // If DisplayName annotation not used then get property name itself
if (columnNames.ContainsKey(displayName))
{
int position = 0;
columnNames.TryGetValue(displayName, out position);
////int position = columnNames.IndexOf(propertyInfo.Name);
////To prevent an exception cast the value to the type of the property.
propertyInfo.SetValue(objT, Convert.ChangeType(wsRow[rowNum, position + 1].Value, propertyInfo.PropertyType));
}
}
retList.Add(objT);
}
}
return retList;
}
//IMPLEMENTATION DONE BY PLACING Code IT IN SEPARATE Helpers.CS file and
//Consuming it in this manner
List<CustomerExcelModel> records =
Helpers.GetClassFromExcel<CustomerExcelModel>(filelocation, 1, 1);
Thanks a lot to the user who Submitted code and Andreas for suggestion
Here are the Following changes done, i am new to generics so forgive and correct me for any mistakes please find modified code below it might help someone
Added Display Annotation entity model to map with the Excel Column
name so that Column Name with spaces can also be handled.
had issue "T objT " as it was outside of for loop and hence caused
same value repeatedly inserted into List fixed it by
instantiating inside loop i.e using "new T()"
Fixed Column out of range error by using "ws.Dimension.Columns" to get Column count , instead of ws.Cells.Count() as it caused out
range column error
for looping through row data added +1 to it ,as RowNum=1 was reading header name also so done minor change of "rowNum = fromRow + 1"
Here I am sharing how you can read the excel. You can modify it to store each date in datatables.
public void readXLS(string FilePath)
{
FileInfo existingFile = new FileInfo(FilePath);
using (ExcelPackage package = new ExcelPackage(existingFile))
{
//get the first worksheet in the workbook
ExcelWorksheet worksheet = package.Workbook.Worksheets[1];
int colCount = worksheet.Dimension.End.Column; //get Column Count
int rowCount = worksheet.Dimension.End.Row; //get row count
for (int row = 1; row <= rowCount; row++)
{
for (int col = 1; col <= colCount; col++)
{
//You can update code here to add each cell value to DataTable.
Console.WriteLine(" Row:" + row + " column:" + col + " Value:" + worksheet.Cells[row, col].Value.ToString().Trim());
}
}
}
}
Reff: http://sforsuresh.in/read-data-excel-sheet-insert-database-table-c/
Use below code if you want to read data of each worksheet from excel, as well if worksheet contain date format data for particular column.
public static DataSet ReadExcelFileToDataSet2(string filePath, bool isFirstRowHeader=true)
{
DataSet result = new DataSet();
Excel.ExcelPackage xlsPackage = new Excel.ExcelPackage(new FileInfo(filePath)); //using Excel = OfficeOpenXml; <--EPPLUS
Excel.ExcelWorkbook workBook = xlsPackage.Workbook;
try
{
for (int count = 1; count <= workBook.Worksheets.Count; count++)
{
Excel.ExcelWorksheet wsworkSheet = workBook.Worksheets[count];
if (wsworkSheet.Name.ToLower() == "sheetName")
{
wsworkSheet.Column(4).Style.Numberformat.Format = "MM-dd-yyyy"; // set column value to read as Date Type or numberformat
}
DataTable tbl = new DataTable();
// wsworkSheet.Dimension - (It will return cell dimesion like A1:N7 , means returning the worksheet dimesions.)
// wsworkSheet.Dimension.End.Address - (It will return right bottom cell like N7)
// wsworkSheet.Dimension.End.Columns - (It will return count from A1 to N7 like here 14)
foreach (var firstRowCell in wsworkSheet.Cells[1, 1, 1, wsworkSheet.Dimension.End.Column]) //.Cells[Row start, Column Start, Row end, Column End]
{
var colName = "";
colName = firstRowCell.Text;
tbl.Columns.Add(isFirstRowHeader ? colName : string.Format("Column {0}", firstRowCell.Start.Column)); //Geth the Column index (index starting with 1) from the left top.
}
var startRow = isFirstRowHeader ? 2 : 1;
for (int rowNum = startRow; rowNum <= wsworkSheet.Dimension.End.Row; rowNum++)
{
var wsRow = wsworkSheet.Cells[rowNum, 1, rowNum, wsworkSheet.Dimension.End.Column]; // wsworkSheet.Cells[Row start, Column Start, Row end, Column End]
DataRow row = tbl.Rows.Add();
foreach (var cell in wsRow)
{
row[cell.Start.Column - 1] = cell.Text;
}
}
tbl.TableName = wsworkSheet.Name;
result.Tables.Add(tbl);
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
return result;
}