c# working on excel files with large data - c#

I'm copying data from first sheet of different excel files to a single workbook. I already have tried it with different alternatives like npoi, spire.xls and Interop which works good, but it kills too much of my time. It would really be thankful if anyone can suggest me with a better one. Been through many forms on the web, but couldn't find.
FYI: Each of My files are more than 50 MB in size. A few being 10 MB or less.
This is one of which I have tried (Uses Spire.xls):
workbook = new Workbook();
//laod first file
workbook.LoadFromFile(names[0]);
//load the remaining files starting with second file
for (int i = 1; i < cnt; i++)
{
LoadFIle(names[i]);
//merge the loaded file immediately and than load next file
MergeData();
}
private void LoadFIle(string filePath)
{
//load other workbooks starting with 2nd workbbook
tempbook = new Workbook();
tempbook.LoadFromFile(filePath);
}
private void MergeData()
{
try
{
int c1 = workbook.ActiveSheet.LastRow, c2 = tempbook.Worksheets[0].LastRow;
//check if you have exceeded 1st sheet limit
if ((c1 + c2) <= 1048575)
{
//import the second workbook's worksheet into the first workbook using a datatable
//load 1st sheet of tempbook into sheet
Worksheet sheet = tempbook.Worksheets[0];
//copy data from sheet into a datatable
DataTable dataTable = sheet.ExportDataTable();
//load sheet1
Worksheet sheet1 = workbook.Worksheets[workbook.ActiveSheetIndex];
sheet1.InsertDataTable(dataTable, false, sheet1.LastRow + 1, 1);
}
else if ((c1 >= 1048575 && c2 >= 1048575) || c1 >= 1048575 || c2 >= 1048575 || (c1 + c2) >= 1048575)
{
workbook.Worksheets.AddCopy(tempbook.Worksheets[0]);
indx = workbook.ActiveSheet.Index;
workbook.ActiveSheetIndex = ++indx;
}
else
{
//import the second workbook's worksheet into the first workbook using a datatable
//load 1st sheet of tempbook into sheet
Worksheet sheet = tempbook.Worksheets[0];
//copy data from sheet into a datatable
DataTable dataTable = sheet.ExportDataTable();
//load sheet1
Worksheet sheet1 = workbook.Worksheets[workbook.ActiveSheetIndex];
sheet1.InsertDataTable(dataTable, false, sheet1.LastRow + 1, 1);
}
}
catch (IndexOutOfRangeException)
{
}
}
}
Well, this works good but as said takes a long time. Any suggestions are welcome. Thanks in advance.

Here is my (fastest I know of) implementation using Excel interop. Although I looked carefully to release all (must have missed one), 2 Excel instances remain in the processes list, they are closed after the program ends.
The key is to only have 2 Open Excel instances and to copy the data as a Block using Range.Value2.
//Helper function to cleanup
public void ReleaseObject(object obj)
{
if (obj != null && Marshal.IsComObject(obj))
{
Marshal.ReleaseComObject(obj);
}
}
public void CopyIntoOne(List<string> pSourceFiles, string pDestinationFile)
{
var sourceExcelApp = new Microsoft.Office.Interop.Excel.Application();
var destinationExcelApp = new Microsoft.Office.Interop.Excel.Application();
// TODO: Check if it exists
destinationExcelApp.Workbooks.Open(pDestinationFile);
// for debug
//destinationExcelApp.Visible = true;
//sourceExcelApp.Visible = true;
int i = 0;
var sheets = destinationExcelApp.ActiveWorkbook.Sheets;
var lastsheet = destinationExcelApp.ActiveWorkbook.Sheets[sheets.Count];
ReleaseObject(sheets);
foreach (var srcFile in pSourceFiles)
{
sourceExcelApp.Workbooks.Open(srcFile);
// get extends
var lastRow = sourceExcelApp.ActiveSheet.Cells.Find("*", System.Reflection.Missing.Value,
System.Reflection.Missing.Value, System.Reflection.Missing.Value, XlSearchOrder.xlByRows,
XlSearchDirection.xlPrevious, false, System.Reflection.Missing.Value, System.Reflection.Missing.Value);
var lastCol = sourceExcelApp.ActiveSheet.Cells.Find("*", System.Reflection.Missing.Value, System.Reflection.Missing.Value,
System.Reflection.Missing.Value, XlSearchOrder.xlByColumns, XlSearchDirection.xlPrevious, false,
System.Reflection.Missing.Value, System.Reflection.Missing.Value);
var startCell = (Range) sourceExcelApp.ActiveWorkbook.ActiveSheet.Cells[1, 1];
var endCell = (Range) sourceExcelApp.ActiveWorkbook.ActiveSheet.Cells[lastRow.Row, lastCol.Column];
var myRange = sourceExcelApp.ActiveWorkbook.ActiveSheet.Range[startCell, endCell];
// copy the values
var value = myRange.Value2;
// create sheet in new Workbook at the end
Worksheet newSheet = destinationExcelApp.ActiveWorkbook.Sheets.Add(After: lastsheet);
ReleaseObject(lastsheet);
lastsheet = newSheet;
//its even faster when adding it at the front
//Worksheet newSheet = destinationExcelApp.ActiveWorkbook.Sheets.Add();
// change that to a good name
newSheet.Name = ++i + "";
var dstStartCell = (Range) destinationExcelApp.ActiveWorkbook.ActiveSheet.Cells[1, 1];
var dstEndCell = (Range) destinationExcelApp.ActiveWorkbook.ActiveSheet.Cells[lastRow.Row, lastCol.Column];
var dstRange = destinationExcelApp.ActiveWorkbook.ActiveSheet.Range[dstStartCell, dstEndCell];
// this is the actual paste
dstRange.Value2 = value;
//cleanup
ReleaseObject(startCell);
ReleaseObject(endCell);
ReleaseObject(myRange);
ReleaseObject(value);// cannot hurt, but not necessary since its a simple array
ReleaseObject(dstStartCell);
ReleaseObject(dstEndCell);
ReleaseObject(dstRange);
ReleaseObject(newSheet);
ReleaseObject(lastRow);
ReleaseObject(lastCol);
sourceExcelApp.ActiveWorkbook.Close(false);
}
ReleaseObject(lastsheet);
sourceExcelApp.Quit();
ReleaseObject(sourceExcelApp);
destinationExcelApp.ActiveWorkbook.Save();
destinationExcelApp.Quit();
ReleaseObject(destinationExcelApp);
destinationExcelApp = null;
sourceExcelApp = null;
}
I have tested it on small excel files and are curious how it behaves with larger files.

Related

How to iterate excel(.xlsm) sheet rows and columns using interop(c#.Net)

I have 5 columns(Name,department,Sales Details,Address in sheet(.xlsm) i have to fill this sheet using class object using for loop and fill the sheet after filling this sheet,same object data some information need to fill sheet2 .I am new for excel sheet operations and interop as well, can some one let me know how to fill this data and programmatically how to switch to next sheet to fill the remaining data in sheet2.
for(int masterValues = 0; masterValues < frequencyValues.Count(); masterValues++)
{ MasterDataHeader name= new MasterDataHeader();
masterDataHeader.department = Kvp[0].ToString();
masterDataHeader.salesdetails= Kvp[1].ToString();
masterDataHeader.adress = Kvp[2].ToString();
masterDataHeader.Gebaude_art = Kvp[3].ToString();
masterdtaheader.frequencyinfo=kvp[4]//need to update sheet2
}
You must have Microsoft Office Excel 2007, or later versions, installed on your computer or on your server where this code will be executed.
Add reference to Assemblies -> Extensions ->Microsoft.Office.Interop.Excel.
If you do not see the assemblies, see How to: Install Office Primary Interop Assemblies.
Here simple example how to add new workbook, add two new sheets, add data to those sheets, save the workbook and quit excel.
using MSExcel = Microsoft.Office.Interop.Excel;
public static void FillExcel(IEnumerable<MasterDataHeader> data)
{
MSExcel.Application excel = new MSExcel.Application { Visible = true };
MSExcel.Workbook workbook = excel.Workbooks.Add();
MSExcel.Worksheet sheet1 = workbook.Worksheets.Add();
MSExcel.Worksheet sheet2 = workbook.Worksheets.Add();
MasterDataHeader[] masterDataHeaders = data as MasterDataHeader[] ?? data.ToArray();
int row = 0;
foreach (var item in masterDataHeaders)
{
row++;
sheet1.Cells[row, 1] = item.Name;
sheet1.Cells[row, 2] = item.Department;
sheet1.Cells[row, 3] = item.SalesDetails;
sheet1.Cells[row, 4] = item.Adress;
}
row = 0;
foreach (var item in masterDataHeaders)
{
row++;
sheet2.Cells[row, 1] = item.GebaudeArt;
sheet2.Cells[row, 2] = item.Frequencyinfo;
}
workbook.Save();
excel.Quit();
}
How to search for a employee number in sheet1 and to find just the first occurrence
object missing = System.Type.Missing;
const int employeeNumber = 16845374;
MSExcel.Range employeeNumberFound = null;
employeeNumberFound = sheet1.UsedRange.Find(employeeNumber, missing,
MSExcel.XlFindLookIn.xlValues, MSExcel.XlLookAt.xlPart,
MSExcel.XlSearchOrder.xlByRows, MSExcel.XlSearchDirection.xlNext, false,
missing, missing);
if (employeeNumberFound != null)
{
Debug.WriteLine($"Employee number '{employeeNumber}' was found in cell '[{employeeNumberFound.Row},{employeeNumberFound.Column}]'");
}
else
{
Debug.WriteLine($"Employee number '{employeeNumber}' was not found");
}
For more information look e.g. here.
Get last row in table2 and insert new row below it. Last row remains 32 in case when table2 is empty. And new row will not be incremented in case when table2 is empty.
var lastRowAsRange = sheet1.Cells[32, 2].CurrentRegion.Columns[1].Cells.Find("*", missing, missing, missing,
XlSearchOrder.xlByRows, XlSearchDirection.xlPrevious, false, missing, missing);
var lastRow = lastRowAsRange != null
? lastRowAsRange.Row
: 32;
var newRow = lastRowAsRange != null
? lastRow + 1
: lastRow;
bool inserted = sheet1.Rows[newRow].Insert(XlDirection.xlDown, XlInsertFormatOrigin.xlFormatFromLeftOrAbove);
if (inserted)
{
sheet1.Cells[newRow, 1] = "IT";
sheet1.Cells[newRow, 2] = "100";
sheet1.Cells[newRow, 3] = "hyd";
}

DataGridViewS export to excel sheetS

I want to export all my DataGridViews in one Excell document.
For every DataGridView there shoud be a own sheet in the Excell File.
But with my code i only recive the Error: System.Runtime.InteropServices.COMException: HRESULT: 0x800A03EC"
I think there is something wrong with my parameters.
private void exportToExcellButton_Click(object sender, EventArgs e)
{
SaveFileDialog saveFileD = new SaveFileDialog();
string fileName = truckListBox.SelectedItem.ToString() + "__" + DateTime.Now.ToShortDateString();
saveFileD.InitialDirectory = #"C:/TML/";
saveFileD.FileName = fileName;
if (!Directory.Exists(#"C:/TML/"))
Directory.CreateDirectory(#"C:/TML/");
List<DataGridView> dataGridViews = getAllDataGridViews();
Microsoft.Office.Interop.Excel.Application app;
Microsoft.Office.Interop.Excel.Workbook book;
Microsoft.Office.Interop.Excel.Worksheet sheet;
app = new Excel.Application();
app.Visible = true;
book = app.Workbooks.Add(System.Reflection.Missing.Value);
foreach (var grid in dataGridViews)
{
int count = book.Worksheets.Count;
sheet = (Worksheet)book.Sheets.Add(Type.Missing, book.Worksheets[count], Type.Missing, Type.Missing);
sheet.Name = grid.Name.ToString().Remove(0, 13);
int cMin = 0, rMin = 0;
int c = cMin, r = rMin;
// Set Headers
foreach (DataGridViewColumn column in grid.Columns)
{
//Here appears the Error: System.Runtime.InteropServices.COMException: HRESULT: 0x800A03EC"
sheet.Cells[r, c] = column.HeaderText;
c++;
}
sheet.Range[sheet.Cells[r, cMin], sheet.Cells[r, c]].Font.Bold = true;
sheet.Range[sheet.Cells[r, cMin], sheet.Cells[r, c]].VerticalAlignment = Microsoft.Office.Interop.Excel.XlVAlign.xlVAlignCenter;
// Set Rows
foreach (DataGridViewRow row in grid.Rows)
{
r++;
c = cMin;
// Set Cells
foreach (DataGridViewCell item in row.Cells)
{
sheet.Cells[r, c++] = item.Value;
}
}
}
book.Save();
book.Close();
app.Quit();
}
Spended allready days into it and cant get it work.
Thx for your Help!
EDIT: Fixed one error to get to the new one.
There are a few problems you may have with the posted code. Therefore, I will break them down.
For starters, it appears you are using a SaveFileDialog however I do not see where it is being used. The code sets the InitalDirectory and FileName, but it is never used. This is not that important as a dialog is not really needed, however the way the code is getting the file name is going to have some problems. The line of code…
string fileName = truckListBox.SelectedItem.ToString() + "__" + DateTime.Now.ToShortDateString();
is going to have problems if you try to save the file name because the string returned from DateTime.Now.ToShortDateString() is going to be in a format like “2019\11\26”… Obviously the “\” characters are going to be interpreted as a path (folder) and will most likely fail when the code tries to save the file. Creating a method that returns a string that uses some other character should be easy to fix this.
Next is the fact that Excel files are NOT zero based on their rows and columns. Therefore, setting the initial Excel row column variables (int c = 0, r = 0;) will fail on the first try. These values should be one (1).
Another problem is on the line…
book.Save();
Is most likely going to save the file to the users “Documents” folder using the file name of “Book1.xlsx.” When saving the file you need to supply the complete path and file name which as stated earlier does not appear to be used.
Lastly, anytime you use “COM” objects such as Excel apps, workbooks and worksheets, it is very important for the code to “RELEASE” the com objects your code creates before you exit the program. In the current posted code, it is highly likely that there are lingering “Excel” resources still running. Therefore, to avoid leaking resources, it is important for your code to release the com objects it creates.
In my example below the code to release the resources is in the finally clause of the try/catch/finally statement.
private void button1_Click(object sender, EventArgs e) {
//SaveFileDialog saveFileD = new SaveFileDialog();
//string fileName = truckListBox.SelectedItem.ToString() + "__" + DateTime.Now.ToShortDateString();
string fileName = #"C:\Users\John\Desktop\Grr\TestExcelFile" + "__" + DateTime.Now.Year + "_" + DateTime.Now.Month;
//saveFileD.InitialDirectory = #"C:\Users\John\Desktop\Grr\";
//saveFileD.FileName = fileName;
//if (!Directory.Exists(#"C:/TML/"))
// Directory.CreateDirectory(#"C:/TML/");
//List<DataGridView> dataGridViews = getAllDataGridViews();
List<DataGridView> dataGridViews = getGrids();
Microsoft.Office.Interop.Excel.Application app = null;
Microsoft.Office.Interop.Excel.Workbook book = null;
Microsoft.Office.Interop.Excel.Worksheet sheet = null;
app = new Microsoft.Office.Interop.Excel.Application();
app.Visible = true;
book = app.Workbooks.Add(System.Reflection.Missing.Value);
try {
foreach (var grid in dataGridViews) {
int count = book.Worksheets.Count;
//sheet = (Microsoft.Office.Interop.Excel.Worksheet)book.Sheets.Add(Type.Missing, book.Worksheets[count], Type.Missing, Type.Missing);
sheet = (Microsoft.Office.Interop.Excel.Worksheet)book.Worksheets.Add();
//sheet.Name = grid.Name.ToString().Remove(0, 13);
sheet.Name = grid.Name.ToString();
int cMin = 1, rMin = 1;
int c = cMin, r = rMin;
// Set Headers
foreach (DataGridViewColumn column in grid.Columns) {
//Here appears the Error: System.Runtime.InteropServices.COMException: HRESULT: 0x800A03EC"
sheet.Cells[r, c] = column.HeaderText;
c++;
}
sheet.Range[sheet.Cells[r, cMin], sheet.Cells[r, c]].Font.Bold = true;
sheet.Range[sheet.Cells[r, cMin], sheet.Cells[r, c]].VerticalAlignment = Microsoft.Office.Interop.Excel.XlVAlign.xlVAlignCenter;
// Set Rows
foreach (DataGridViewRow row in grid.Rows) {
r++;
c = cMin;
// Set Cells
foreach (DataGridViewCell item in row.Cells) {
sheet.Cells[r, c++] = item.Value;
}
}
}
book.SaveAs(fileName, Type.Missing, Type.Missing, Type.Missing);
book.Close();
app.Quit();
}
catch (Exception ex) {
MessageBox.Show("Error writing to excel: " + ex.Message);
}
finally {
if (sheet != null)
Marshal.ReleaseComObject(sheet);
if (book != null)
Marshal.ReleaseComObject(book);
if (app != null)
Marshal.ReleaseComObject(app);
}
}
Hope this helps.
Simply Make a method and pass DataGridView
using Excel = Microsoft.Office.Interop.Excel;
public void ete(DataGridView dgv)//ExportToExcel
{
// Creating a Excel object.
Excel._Application excel = new Excel.Application();
Excel._Workbook workbook = excel.Workbooks.Add(Type.Missing);
Excel._Worksheet worksheet = null;
excel.Columns.ColumnWidth = 20;
try
{
worksheet = workbook.ActiveSheet;
worksheet.Name = "ExportedFromDatGrid";
int cellRowIndex = 1;
int cellColumnIndex = 1;
//Loop through each row and read value from each column.
for (int i = -1; i < dgv.Rows.Count; i++)
{
for (int j = 0; j < dgv.Columns.Count; j++)
{
// Excel index starts from 1,1. As first Row would have the Column headers, adding a condition check.
if (cellRowIndex == 1)
{
worksheet.Cells[cellRowIndex, cellColumnIndex] = dgv.Columns[j].HeaderText;
}
else
{
worksheet.Cells[cellRowIndex, cellColumnIndex] = dgv.Rows[i].Cells[j].Value.ToString();
}
cellColumnIndex++;
}
cellColumnIndex = 1;
cellRowIndex++;
}
//Getting the location and file name of the excel to save from user.
SaveFileDialog saveDialog = new SaveFileDialog();
saveDialog.Filter = "Excel files (*.xlsx)|*.xlsx|All files (*.*)|*.*";
saveDialog.FilterIndex = 2;
if (saveDialog.ShowDialog() == DialogResult.OK)
{
workbook.SaveAs(saveDialog.FileName.ToString());
MessageBox.Show("Export Successful");
}
}
catch (System.Exception ex)
{
MessageBox.Show(ex.Message);
}
finally
{
excel.Quit();
workbook = null;
excel = null;
}
}
Now Call Method
ete(datagridview1);

Add new sheet to Excel workbook to store data when row limit > 1M

I am trying to store some data into an excel file using Spire.Xls.
When copying data from multiple files into the first sheet of another excel file, I want to create a new excel sheet when reaching the row = 1,048,575, and paste the data into this new sheet. Here is my code:
Workbook tempbook = new Workbook();
tempbook.LoadFromFile(PathToSecondFile);
Workbook workbook = new Workbook();
workbook.LoadFromFile(PathToFirstFile);
//import the second workbook's worksheet into the first workbook using a datatable
Worksheet sheet2 = tempbook.Worksheets[0];
//copy data from sheet2 into a datatable
DataTable dataTable = sheet2.ExportDataTable();
//load sheet1
Worksheet sheet1 = workbook.Worksheets[0];
var c1 = sheet1.LastRow;
var c2 = sheet2.LastRow;
if (c1 >= 1048575 || c2 >= 1048575 || (c1 + c2) >= 1048575)
{
//create a new worksheet and append data into it but
//at this line getting Index out of bound exception
Worksheet sheet3 = workbook.Worksheets.Add("NewSheet");
sheet3.InsertDataTable(dataTable, false, sheet3.LastRow + 1, 1);
}
else
{
sheet1.InsertDataTable(dataTable, false, sheet1.LastRow + 1, 1);
}
}
Even if the code surpasses the exception, data is saved into sheet2, but the data to sheet1 is not saved. All suggestions are welcome. Thanks in advance.
Is it possoble to switch to Excel 2007+ format? In this way you can find a lot of providers for excel, for example Apache npoi, Spreadsheet Light.
It was pretty simple actually... A bit workaround did the task. So here's the code:
int c1 = workbook.ActiveSheet.LastRow, c2 = tempbook.Worksheets[0].LastRow;
if ((c1 + c2) <= 1048575)
{
//import the second workbook's worksheet into the first workbook using a datatable
//load 1st sheet of tempbook into sheet
Worksheet sheet = tempbook.Worksheets[0];
//copy data from sheet into a datatable
DataTable dataTable = sheet.ExportDataTable();
//load sheet1
Worksheet sheet1 = workbook.Worksheets[workbook.ActiveSheetIndex];
sheet1.InsertDataTable(dataTable, false, sheet1.LastRow + 1, 1);
}
else if ((c1 >= 1048575 && c2 >= 1048575) || c1 >= 1048575 || c2 >= 1048575 || (c1 + c2) >= 1048575)
{
workbook.Worksheets.AddCopy(tempbook.Worksheets[0]);
indx = workbook.ActiveSheet.Index;
workbook.ActiveSheetIndex = ++indx;
}
else
{
//import the second workbook's worksheet into the first workbook using a datatable
//load 1st sheet of tempbook into sheet
Worksheet sheet = tempbook.Worksheets[0];
//copy data from sheet into a datatable
DataTable dataTable = sheet.ExportDataTable();
//load sheet1
Worksheet sheet1 = workbook.Worksheets[workbook.ActiveSheetIndex];
sheet1.InsertDataTable(dataTable, false, sheet1.LastRow + 1, 1);
}
Obviously, the if and else block code is same. But couldn't think of anything else. Any modification are welcome.

C# How to write each table in a Word file to its own Excel file

I'm trying to write code in C# WinForms that allows a user to select a directory tree, and extract all of the table data from a word document into an excel file. Presently, the code compiles and you can select your directories, etc, but once it begins to iterate through the loop for each table it crashes.
The program successfully opens the first word file and writes the first excel file (table_1_whatever.xlsx) and saves it in the destination folder. However, on the second table in the same file I get this error on this line of code:
worksheet.Cells[row, col] = objExcelApp.WorksheetFunction.Clean(table.Cell(row, col).Range.Text);
System.Runtime.InteropServices.COMException: 'The requested member of the collection does not exist.'
I can't seem to figure out why it doesn't exist. Each time it goes through the foreach loop it should be creating a new worksheet, but it doesn't appear to be working. Any insight, examples, or suggestions are welcome!
Code:
private void WordRunButton_Click(object sender, EventArgs e)
{
var excelApp = new excel.Application();
excel.Workbooks workbooks = excelApp.Workbooks;
var wordApp = new word.Application();
word.Documents documents = wordApp.Documents;
wordApp.Visible = false;
excelApp.Visible = false;
string[] fileDirectories = Directory.GetFiles(WordSourceBox.Text, "*.doc*",
SearchOption.AllDirectories);
foreach (var item in fileDirectories)
{
word._Document document = documents.Open(item);
int tableCount = 1;
foreach (word.Table table in document.Tables)
{
if (table.Cell(1, 1).ToString() != "Doc Level")
{
string wordFile = item;
appendName = Path.GetFileNameWithoutExtension(wordFile) + "_Table_" + tableCount + ".xlsx";
var workbook = excelApp.Workbooks.Add(1);
excel._Worksheet worksheet = (excel.Worksheet)workbook.Sheets[1];
for (int row = 1; row <= table.Rows.Count; row++)
{
for (int col = 1; col <= table.Columns.Count; col++)
{
var cell = table.Cell(row, col);
var range = cell.Range;
var text = range.Text;
var cleaned = excelApp.WorksheetFunction.Clean(text);
worksheet.Cells[row, col] = cleaned;
}
}
workbook.SaveAs(Path.Combine(WordOutputBox.Text, Path.GetFileName(appendName)), excel.XlFileFormat.xlWorkbookDefault);
workbook.Close();
Marshal.ReleaseComObject(workbook);
}
else
{
WordOutputStreamBox.AppendText(String.Format("Table {0} ignored\n", tableCount));
}
WordOutputStreamBox.AppendText(appendName + "\n");
tableCount++;
}
document.Close();
Marshal.ReleaseComObject(document);
WordOutputStreamBox.AppendText(item + "\n");
}
WordOutputStreamBox.AppendText("\nAll files parsed");
excelApp.Application.Quit();
workbooks.Close();
excelApp.Quit();
WordOutputStreamBox.AppendText("\nExcel files closed");
Marshal.ReleaseComObject(workbooks);
Marshal.ReleaseComObject(excelApp);
WordOutputStreamBox.AppendText("\nExcel files released");
wordApp.Application.Quit();
wordApp.Quit();
WordOutputStreamBox.AppendText("\nWord files have been quit");
Marshal.ReleaseComObject(documents);
Marshal.ReleaseComObject(wordApp);
WordOutputStreamBox.AppendText("\nWord files have been released\n");
}
Edit 1:(Sorry for posting in the wrong place the first time!)
Ok, so the problem has been isolated...
The code logic of the code was fine, and the table was in fact there. The issue is that the second table of these files has a set of split cells in it, so, when it reaches the cell that contains it, the program crashes.
As a temp fix, I have just set it to ignore the table if the header == whatever. Does anyone know of a solution that actually allows to extract this data though?

Using assertions to compare two excel files

Im using Visual Studio to create an automated test that creates two excel sheets. As a final check, I need to compare the content of these two excel sheets and ensure that they are equal. Is there any way to do this with assertions?
Something like Assert.AreEqual(file1, file2);?
Any help or guidance would be appreciated!
Thanks to Mangist for guidance on this. Ive written the following to compare two excel files:
public bool compareFiles(string filePath1, string filePath2)
{
bool result = false;
Excel.Application excel = new Excel.Application();
//Open files to compare
Excel.Workbook workbook1 = excel.Workbooks.Open(filePath1);
Excel.Workbook workbook2 = excel.Workbooks.Open(filePath2);
//Open sheets to grab values from
Excel.Worksheet worksheet1 = (Excel.Worksheet)workbook1.Sheets[1];
Excel.Worksheet worksheet2 = (Excel.Worksheet)workbook2.Sheets[1];
//Get the used range of cells
Excel.Range range = worksheet2.UsedRange;
int maxColumns = range.Columns.Count;
int maxRows = range.Rows.Count;
//Check that each cell matches
for (int i = 1; i <= maxColumns; i++)
{
for (int j = 1; j <= maxRows; j++)
{
if (worksheet1.Cells[j, i].Value == worksheet2.Cells[j, i].Value)
{
result = true;
}
else
result = false;
}
}
//Close the workbooks
GC.Collect();
GC.WaitForPendingFinalizers();
Marshal.ReleaseComObject(range);
Marshal.ReleaseComObject(worksheet1);
Marshal.ReleaseComObject(worksheet2);
workbook1.Close();
workbook2.Close();
excel.Quit();
Marshal.ReleaseComObject(excel);
//Tell us if it is true or false
return result;
}
And using an assertion to check result:
Assert.IsTrue(compareFiles(testFile, compareFile), "Output files do not match.");
Can you convert the expected/actual Excel sheets to a text format, such as CSV?
If so, you could use Approval Tests instead. This allows you to have a text file as your "expected" test result. When tests fail it can show you the actual result of the test, diff'd against the expected result.
Screenshot taken from this review of Approval Tests.
One Option will be using Open source library called as EPPlus. You can download and refer it in you automated test application. EPPlus gives you multiple ways to read an excel file and compare. One such option is C# Datatable. Here is a sample code..
public static DataTable GetDataTableFromExcel(string path, bool hasHeader = true)
{
using (var pck = new OfficeOpenXml.ExcelPackage())
{
using (var stream = File.OpenRead(path))
{
pck.Load(stream);
}
var ws = pck.Workbook.Worksheets.First();
DataTable tbl = new DataTable();
foreach (var firstRowCell in ws.Cells[1, 1, 1, ws.Dimension.End.Column])
{
tbl.Columns.Add(hasHeader ? firstRowCell.Text : string.Format("Column {0}", firstRowCell.Start.Column));
}
var startRow = hasHeader ? 2 : 1;
for (int rowNum = startRow; rowNum <= ws.Dimension.End.Row; rowNum++)
{
var wsRow = ws.Cells[rowNum, 1, rowNum, ws.Dimension.End.Column];
DataRow row = tbl.Rows.Add();
foreach (var cell in wsRow)
{
row[cell.Start.Column - 1] = cell.Text;
}
}
return tbl;
}
}
For both the Excel files , same process can be adopted, and it will give you the desired results.

Categories

Resources