I know questions this kind are asked from time to time but i can't find any satisfying solution.
How can I open a CSV-File using MS ACE OLEDB 12?
I try it with the following code.
DbConnection connection = new OleDbConnection();
connection.ConnectionString = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=C:\\Documents;Extended Properties=\"Text;HDR=Yes\"";
connection.Open();
DbCommand cmd;
cmd = connection.CreateCommand();
cmd.CommandText = "SELECT * FROM [Mappe1#csv]";
DbDataReader reader = cmd.ExecuteReader();
while (reader.Read())
{
for (int i = 0; i < reader.FieldCount; i++)
Console.Write("(" + reader.GetValue(i).ToString() + ")");
Console.WriteLine();
}
cmd.Dispose();
connection.Dispose();
Console.WriteLine("Done");
Console.ReadKey();
The Problem is that only one column is found. The Text is delimited by ';'. Even when i specifiy the delimiter with "Delimited(|)" f.e. it will not work.
I can't find any documentation for this provider...
This helped me getting a semicolon-delimited csv to parse in C# using ACE.OLEDB.12.0:
http://sqlserverpedia.com/blog/sql-server-bloggers/use-ace-drivers-and-powershell-to-talk-to-text-files/:
Create a schema.ini text file in the same directory as the csv file you want to import with the following contents:
[fileIwantToImport.csv]
Format=Delimited(;)
ColNameHeader=True
Worked for me. But so yucky.
Seems like the FORMAT=Delimited(;) in the connection string has gone out of fashion...
Try:
connection.ConnectionString = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=C:\\Documents;Extended Properties=\"Text;HDR=Yes;FORMAT=Delimited\"";
(inserting "FORMAT=Delimited" into the extended properties of the connection string...)
Have you considered creating DataSet?
public static DataSet ConvertTabFiles(string File, string TableName, string delimiter)
{
//The DataSet to Return
DataSet result = new DataSet();
//Open the file in a stream reader.
StreamReader s;
try
{
s = new StreamReader(#File);
}
catch
{
MessageBox.Show("Can't perform operation on file: " + File);
return result;
}
//Split the first line into the columns
string[] columns = null;
try
{
columns = s.ReadLine().Split(delimiter.ToCharArray());
}
catch
{
MessageBox.Show("Can't parse the file " + File + ", please try again!");
return result;
}
//Add the new DataTable to the RecordSet
result.Tables.Add(TableName);
//MessageBox.Show("Add the new DataTable to the RecordSet");
//Cycle the colums, adding those that don't exist yet
//and sequencing the one that do.
foreach (string col in columns)
{
bool added = false;
string next = "";
int i = 0;
while (!added)
{
//Build the column name and remove any unwanted characters.
string columnname = col + next;
//See if the column already exists
if (!result.Tables[TableName].Columns.Contains(columnname))
{
//if it doesn't then we add it here and mark it as added
result.Tables[TableName].Columns.Add(columnname);
added = true;
}
else
{
//if it did exist then we increment the sequencer and try again.
i++;
next = "_" + i.ToString();
}
}
}
//Read the rest of the data in the file.
string AllData = s.ReadToEnd();
string[] rows = AllData.Split("\r\n".ToCharArray());
//Now add each row to the DataSet
foreach (string r in rows)
{
//Split the row at the delimiter.
string[] items = r.Split(delimiter.ToCharArray());
//Add the item
result.Tables[TableName].Rows.Add(r);
}
//Return the imported data.
return result;
}
Related
I would like to "clean" a CSV file:
deleting empty rows
deleting empty columns
The rows or columns are not completely empty, they have, for example:
"","","","","","","","","","","","","","",
(in a row form)
OR
"","","","","","","","","","",
(in a row form)
OR
"",
"",
"",
"",
"",
"",
"",
(in a columns form)
These rows or columns can be anywhere in the CSV file.
What I have so far:
private void button1_Click(object sender, EventArgs e)
{
string sourceFile = #"XXXXX.xlsx";
string worksheetName = "Sample";
string targetFile = #"C:\Users\xxxx\xls_test\XXXX.csv";
// Creates the CSV file based on the XLS file
ExcelToCSVCoversion(sourceFile, worksheetName, targetFile);
// Manipulate the CSV: Clean empty rows
DeleteEmptyRoadFromCSV(targetFile);
}
static void ExcelToCSVCoversion(string sourceFile, string worksheetName,
string targetFile)
{
string connectionString = #"Provider =Microsoft.ACE.OLEDB.12.0;Data Source=" + sourceFile
+ #";Extended Properties=""Excel 12.0 Xml;HDR=YES""";
OleDbConnection connection = null;
StreamWriter writer = null;
OleDbCommand command = null;
OleDbDataAdapter dataAdapter = null;
try
{
// Represents an open connection to a data source.
connection = new OleDbConnection(connectionString);
connection.Open();
// Represents a SQL statement or stored procedure to execute
// against a data source.
command = new OleDbCommand("SELECT * FROM [" + worksheetName + "$]",
connection);
// Specifies how a command string is interpreted.
command.CommandType = CommandType.Text;
// Implements a TextWriter for writing characters to the output stream
// in a particular encoding.
writer = new StreamWriter(targetFile);
// Represents a set of data commands and a database connection that are
// used to fill the DataSet and update the data source.
dataAdapter = new OleDbDataAdapter(command);
DataTable dataTable = new DataTable();
dataAdapter.Fill(dataTable);
for (int row = 0; row < dataTable.Rows.Count; row++)
{
string rowString = "";
for (int column = 0; column < dataTable.Columns.Count; column++)
{
rowString += "\"" + dataTable.Rows[row][column].ToString() + "\",";
}
writer.WriteLine(rowString);
}
Console.WriteLine();
Console.WriteLine("The excel file " + sourceFile + " has been converted " +
"into " + targetFile + " (CSV format).");
Console.WriteLine();
}
catch (Exception exception)
{
Console.WriteLine(exception.ToString());
Console.ReadLine();
}
finally
{
if (connection.State == ConnectionState.Open)
{
connection.Close();
}
connection.Dispose();
command.Dispose();
dataAdapter.Dispose();
writer.Close();
writer.Dispose();
}
}
static void DeleteEmptyRoadFromCSV(string fileName)
{
//string nonEmptyLines = #"XXXX.csv";
var nonEmptyLines = File.ReadAllLines(fileName)
.Where(x => !x.Split(',')
.Take(2)
.Any(cell => string.IsNullOrWhiteSpace(cell))
// use `All` if you want to ignore only if both columns are empty.
).ToList();
File.WriteAllLines(fileName, nonEmptyLines);
}
Finally, I tried to use the ideas from:
Remove Blank rows from csv c# . But my ouput is not changing at all.
Any help is welcome!
Thank you.
You could delete columns/rows from table before saving csv.
Method is not tested, but you should get the concept.
static void ExcelToCSVCoversion(string sourceFile, string worksheetName,
string targetFile)
{
string connectionString = #"Provider =Microsoft.ACE.OLEDB.12.0;Data Source=" + sourceFile
+ #";Extended Properties=""Excel 12.0 Xml;HDR=YES""";
OleDbConnection connection = null;
StreamWriter writer = null;
OleDbCommand command = null;
OleDbDataAdapter dataAdapter = null;
try
{
// Represents an open connection to a data source.
connection = new OleDbConnection(connectionString);
connection.Open();
// Represents a SQL statement or stored procedure to execute
// against a data source.
command = new OleDbCommand("SELECT * FROM [" + worksheetName + "$]",
connection);
// Specifies how a command string is interpreted.
command.CommandType = CommandType.Text;
// Implements a TextWriter for writing characters to the output stream
// in a particular encoding.
writer = new StreamWriter(targetFile);
// Represents a set of data commands and a database connection that are
// used to fill the DataSet and update the data source.
dataAdapter = new OleDbDataAdapter(command);
DataTable dataTable = new DataTable();
dataAdapter.Fill(dataTable);
var emptyRows =
dataTable.Select()
.Where(
row =>
dataTable.Columns.Cast<DataColumn>()
.All(column => string.IsNullOrEmpty(row[column].ToString()))).ToArray();
Array.ForEach(emptyRows, x => x.Delete());
var emptyColumns =
dataTable.Columns.Cast<DataColumn>()
.Where(column => dataTable.Select().All(row => string.IsNullOrEmpty(row[column].ToString())))
.ToArray();
Array.ForEach(emptyColumns, column => dataTable.Columns.Remove(column));
dataTable.AcceptChanges();
for (int row = 0; row < dataTable.Rows.Count; row++)
{
string rowString = "";
for (int column = 0; column < dataTable.Columns.Count; column++)
{
rowString += "\"" + dataTable.Rows[row][column].ToString() + "\",";
}
writer.WriteLine(rowString);
}
Console.WriteLine();
Console.WriteLine("The excel file " + sourceFile + " has been converted " +
"into " + targetFile + " (CSV format).");
Console.WriteLine();
}
catch (Exception exception)
{
Console.WriteLine(exception.ToString());
Console.ReadLine();
}
finally
{
if (connection.State == ConnectionState.Open)
{
connection.Close();
}
connection.Dispose();
command.Dispose();
dataAdapter.Dispose();
writer.Close();
writer.Dispose();
}
}
Please check if the following query is working.I am getting all the rows:
var nonEmptyLines = File.ReadAllLines(FileName)
.Where(x => !x.Split(',')
.Take(2)
.Any(cell => string.IsNullOrWhiteSpace(cell))
// use `All` if you want to ignore only if both columns are empty.
).ToList();
I think you can use something as:
var nonEmptyLines = File.ReadAllLines(File).
SkipWhile(cell=>{var arr=cell.Split(',');if(string.IsNullOrWhiteSpace(cell)){
return true;
}
else
{
return false;
}
});
I am trying to grab cells in XLS spreadsheets, assign them to string arrays, then manipulate the data and export to multiple CVS files.
The trouble is the XLS spreadsheet contains information that is not relevant, useable data doesn't start till row 17 and columns have no headings with just the default Sheet1.
I have looked at related questions and tried figuring it out myself with no success. The following code to read the XLS kinda works but is messy to work with as the row lengths vary from one XLS file to another and it is automatically pulling empty columns and rows.
CODE
public static void xlsReader()
{
string fileName = string.Format("{0}\\LoadsAvailable.xls", Directory.GetCurrentDirectory());
string connectionString = #"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + fileName + ";" + #"Extended Properties='Excel 8.0;HDR=Yes;'";
string queryString = "SELECT * FROM [Sheet1$]";
using (OleDbConnection connection = new OleDbConnection(connectionString))
{
OleDbCommand command = new OleDbCommand(queryString, connection);
connection.Open();
OleDbDataReader reader = command.ExecuteReader();
int counter = 0;
while (reader.Read())
{
Console.WriteLine("Line " + counter + ":" + reader[28].ToString()); // Just for testing
counter++;
}
}
}
I could do a bunch of trickery with loops to get the data that is required but there has to be a query string that could get the data from row 17 with only 8 columns(not 26 columns with 18 empty)?
I have tried many query string examples and can not seam to get any to work with a starting row index or filter out the empty data.
Here is a handy method that converts an excel file to a flat file.
You may want to change the connection string properties to suit your needs. I needed headers for my case.
Note you will need the Access database engine installed on your machine. I needed the 32 bit version since the app i dev'd was 32 bit. I bet you will also need it.
I parameterized the delimiter for the flat file, because I had cases where I didn't need a comma but a pipe symbol.
How to call method ex: ConvertExcelToFlatFile(openFileName, savePath, '|'); // pipe delimited
// Converts Excel To Flat file
private void ConvertExcelToFlatFile(string excelFilePath, string csvOutputFile, char delimeter, int worksheetNumber = 1)
{
if (!File.Exists(excelFilePath)) throw new FileNotFoundException(excelFilePath);
if (File.Exists(csvOutputFile)) throw new ArgumentException("File exists: " + csvOutputFile);
// connection string
var cnnStr = String.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0};Extended Properties=\"Excel 12.0 Xml; IMEX=1; HDR=NO\"", excelFilePath);
var cnn = new OleDbConnection(cnnStr);
// get schema, then data
var dt = new DataTable();
try
{
cnn.Open();
var schemaTable = cnn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
if (schemaTable.Rows.Count < worksheetNumber) throw new ArgumentException("The worksheet number provided cannot be found in the spreadsheet");
string worksheet = schemaTable.Rows[worksheetNumber - 1]["table_name"].ToString().Replace("'", "");
string sql = String.Format("select * from [{0}]", worksheet);
var da = new OleDbDataAdapter(sql, cnn);
da.Fill(dt);
}
catch (Exception e)
{
throw e;
}
finally
{
// free resources
cnn.Close();
}
// write out CSV data
using (var wtr = new StreamWriter(csvOutputFile)) // disposes file handle when done
{
foreach (DataRow row in dt.Rows)
{
//MessageBox.Show(row.ItemArray.ToString());
bool firstLine = true;
foreach (DataColumn col in dt.Columns)
{
// skip the first line the initial
if (!firstLine)
{
wtr.Write(delimeter);
}
else
{
firstLine = false;
}
var data = row[col.ColumnName].ToString();//.Replace("\"", "\"\""); // replace " with ""
wtr.Write(String.Format("{0}", data));
}
wtr.WriteLine();
}
}
}
I am using the following code to open an excel file (XLS) and populate a DataTable with the first worksheet:
var connectionString = string.Format("Provider=Microsoft.Jet.OLEDB.4.0; data source={0}; Extended Properties=Excel 8.0;", filename);
OleDbConnection connExcel = new OleDbConnection(connectionString);
connExcel.Open();
DataTable dtExcelSchema;
dtExcelSchema = connExcel.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
string SheetName = dtExcelSchema.Rows[0]["TABLE_NAME"].ToString();
connExcel.Close();
var adapter = new OleDbDataAdapter("SELECT * FROM [" + SheetName + "]", connectionString);
var ds = new DataSet();
int count = 0;
adapter.Fill(ds, SheetName);
DataTable dt = ds.Tables[0];
It works only when the file is already open in Ms Excel. Why could that be?
If the file is not open, I get an error message (on line connExcel.Open): External table is not in the expected format.
I'm facing the same problem and accordingly to this site, many developers are struggling for the same:
-When I try read Excel with OLE DB all values are empty
-Can't connect to excel file unless file is already open
Actually I'm using the classic connection string (note that I'm trying to read a 97/2003 file):
Provider=Microsoft.Jet.OLEDB.4.0; Data Source = " + GetFilename(filename) + "; Extended Properties ='Excel 8.0;HDR=NO;IMEX=1'
but the file can be read properly only if:
Is open in Excel or even in Word! (the file of course appears corrupted and unreadable, but then the OleDb procedure can read every line of the file), I didn't try with other Office apps
The file is not in read-only mode
I also tried to lock the file manually or to open it with other non-office applications, but the result is not the same. If I follow the two previous rules (file opened in Word or Excel in not read-only mode) I can see all the cells, otherwise it seems the first column is ignored completely (so F2 became F1, F3 became F2,... and F6, the last one, should became F5 otherwise it throws and out-of-index error).
In order to keep compatibility with OleDb without using 3rd parties libraries I found a very stupid workaround using Microsoft.Office.Interop.Excel assembly.
Excel.Application _app = new Excel.Application();
var workbooks = _app.Workbooks;
workbooks.Open(_filename);
// OleDb Connection
using (OleDbConnection conn = new OleDbConnection(connectionOleDb))
{
try
{
conn.Open();
OleDbCommand cmd = new OleDbCommand();
cmd.Connection = conn;
cmd.CommandText = String.Format("SELECT * FROM [{0}$]", tableName);
OleDbDataReader myReader = cmd.ExecuteReader();
int i = 0;
while (myReader.Read())
{
//Here I read through all Excel rows
}
}
catch (Exception E)
{
MessageBox.Show("Error!\n" + E.Message);
}
finally
{
conn.Close();
workbooks.Close();
if (workbooks != null)
System.Runtime.InteropServices.Marshal.ReleaseComObject(workbooks);
_app.Quit();
System.Runtime.InteropServices.Marshal.ReleaseComObject(_app);
}
}
Essentially the first 3 lines run an Excel instance that lasts exactly the time needed to OleDb to perform its tasks.
The last 4 lines, inside the finally block, let the Excel instance to be closed correctly, immediately after the task and avoid ghost Excel processes.
I repeat it's a very stupid workaround that also requires a 1,5 MB dll (Microsoft.Office.Interop.Excel.dll) to be added to the project.
Anyway seems impossible that OleDb cannot manage by itself the missing data...
I had the same problem. If the file was open the read was ok but if the file was closed... some thing was strange... in my case I received strange data from columns and values.. Debugging I found the name of the first sheet and was strange ["xls _xlnm#_FilterDatabase"] looking on the internet I found that's a name of hidden sheet and a trick to avoid read this sheet (HERE) and so I've implemented a method:
private string getFirstVisibileSheet(DataTable dtSheet, int index = 0)
{
string sheetName = String.Empty;
if (dtSheet.Rows.Count >= (index + 1))
{
sheetName = dtSheet.Rows[index]["TABLE_NAME"].ToString();
if (sheetName.Contains("FilterDatabase"))
{
return getFirstVisibileSheet(dtSheet, ++index);
}
}
return sheetName;
}
To me worked very well.
My complete example code is:
string excelFilePath = String.Empty;
string stringConnection = String.Empty;
using (OpenFileDialog openExcelDialog = new OpenFileDialog())
{
openExcelDialog.Filter = "Excel 2007 (*.xlsx)|*.xlsx|Excel 2003 (*.xls)|*.xls";
openExcelDialog.FilterIndex = 1;
openExcelDialog.RestoreDirectory = true;
DialogResult windowsResult = openExcelDialog.ShowDialog();
if (windowsResult != System.Windows.Forms.DialogResult.OK)
{
return;
}
excelFilePath = openExcelDialog.FileName;
using (DataTable dt = new DataTable())
{
try
{
if (!excelFilePath.Equals(String.Empty))
{
stringConnection = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + excelFilePath + ";Extended Properties='Excel 8.0; HDR=YES;';";
using (OleDbConnection conn = new OleDbConnection(stringConnection))
{
conn.Open();
OleDbCommand cmd = new OleDbCommand();
cmd.Connection = conn;
DataTable dtSheet = conn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
string sheetName = getFirstVisibileSheet(dtSheet);
cmd.CommandText = "SELECT * FROM [" + sheetName + "]";
dt.TableName = sheetName;
OleDbDataAdapter da = new OleDbDataAdapter(cmd);
da.Fill(dt);
cmd = null;
conn.Close();
}
}
//Read and Use my DT
foreach (DataRow row in dt.Rows)
{
//On my case I need data on first and second Columns
if ((row.ItemArray.Count() < 2) ||
(row[0] == null || String.IsNullOrWhiteSpace(row[0].ToString()))
||
(row[1] == null ||String.IsNullOrWhiteSpace(row[1].ToString())))
{
continue;
}
//Get the number from the first COL
int colOneNumber = 0;
Int32.TryParse(row[0].ToString(), out colOneNumber);
//Get the string from the second COL
string colTwoString = row[1].ToString();
//Get the string from third COL if is a file path valid
string colThree = (row.ItemArray.Count() >= 3
&& !row.IsNull(2)
&& !String.IsNullOrWhiteSpace(row[2].ToString())
&& File.Exists(row[2].ToString())
) ? row[2].ToString() : String.Empty;
}
}
catch (Exception ex)
{
MessageBox.Show("Import error.\n" + ex.Message, "::ERROR::", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}
}
private string getFirstVisibileSheet(DataTable dtSheet, int index = 0)
{
string sheetName = String.Empty;
if (dtSheet.Rows.Count >= (index + 1))
{
sheetName = dtSheet.Rows[index]["TABLE_NAME"].ToString();
if (sheetName.Contains("FilterDatabase"))
{
return getFirstVisibileSheet(dtSheet, ++index);
}
}
return sheetName;
}
Is it failing on ToString(), like here?
Error is "Object reference not set to an instance of an object"
Does Convert.ToString() fix anything?
I am facing very strange issue. I have written class to which reads dbf file through oledb connection. I have downloaded dbf file from internet and it is reading all data correctly.
DBF file location: E:\Projects\SLAVE.DBF
I am facing following 2 issues
1) When I try to read other dbf file then it is reading only its table fields. it is not reading table fields data.
E:\Projects\line75.dbf
2) The other issue I am facing I have DBF files when I put these files in location then i am getting exception that
microsoft jet database engine does not find required object. Are you
missing some directive or path. E:\Projects\SDW_plnParcel.dbf
I am totally confused why it is reading SLAVE.DBF downloaded from internet correct, why it is not reading TABLE FIELDS DATA of line75.dbf and why it is throwing exception on SDW_plnParcel.dbf.
My class and one function for this class is as follows:
public class dbfHandler
{
public dbfHandler()
{
this.dbfTable = new DataTable();
}
public void initconnection(String filepath) // initialise dbconnection
{
String[] splitString = filepath.Split('\\');
this.filename = splitString[splitString.Length - 1];
splitString = splitString.Where(w => w != splitString[splitString.Length - 1]).ToArray();
String folderPath = String.Join("\\", splitString);
this.dbConnection = new OleDbConnection(#"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + folderPath + ";Extended Properties=dBase III");
this.dbConnection.Open();
}
public List<String> getcolumnvalues(int fieldIndex, List<int> rowIndexes)
{
List<String> columnvalues = new List<string>();
try
{
if(this.dbConnection.State == ConnectionState.Open)
{
string mySQL = "select * from " + this.filename; // dbf table name
OleDbCommand MyQuery = new OleDbCommand(mySQL, this.dbConnection);
OleDbDataReader reader = MyQuery.ExecuteReader();
int rowCount = 0;
while(reader.Read())
{
bool match = rowIndexes.Any(item => item == rowCount);
if(match == true)
{
String value = reader.GetValue(fieldIndex).ToString();
columnvalues.Add(value);
}
rowCount++;
}
reader.Close();
}
}
catch(Exception e)
{
throw e;
}
return columnvalues;
}
private String filename;
private DataTable dbfTable;
private OleDbConnection dbConnection;
}
When dealing with .DBF files, I have always had better results working with Microsoft's Visual Foxpro OleDb Provider
The connection string in simplified format
var connString = #"Provider=VFPOLEDB.1;Data Source=C:\SomePathToData;";
Now, instead of doing the data reader -- just to make sure you can get / see what you are expecting, try using a DataAdapter...
var da = new OleDataAdapter( yourSqlCmdObject, yourConnection)
var dt = new DataTable();
da.Fill(dt);
It should pull all columns from your query and all rows into proper data column types... Then you could cycle through all the column names, rows, etc..
foreach( DataColumn dc in dt.Columns )
var tmp = dc.ColumnName;
foreach( DataRow dr in dt.Rows )
{
object x = dr[0]; // get VALUE from column 0
x = dr["SpecificColumn"]; // if you KNOW the column name
}
Of which, you could tweak as needed. But if you only need a SPECIFIC column (or limited columns), change your query to quantify that.
Select OneField from YourTable...
I have just written what has to be considered utterly hideous code to count the rows that contain data in the worksheets called "Data" from all the spreadsheets in a given directory. Here's the code
private const string _ExcelLogDirectoryPath = #"..\..\..\..\Model\ExcelLogs\";
static void Main()
{
var excelLogPaths = Directory.GetFiles(_ExcelLogDirectoryPath, "*.xl*");
var excel = new Microsoft.Office.Interop.Excel.Application();
var excelRowCounts = new Dictionary<string, int>();
foreach (var filePath in excelLogPaths)
{
var spreadsheet = excel.Workbooks.Open(Path.GetDirectoryName(System.Windows.Forms.Application.ExecutablePath) + "/" + filePath);
var worksheet = spreadsheet.Sheets["Data"] as Worksheet;
if (worksheet != null)
{
// var rowCount = UsedRange.Rows.Count - 1; DOES NOT WORK, THE number is bigger than the 'real' answer
var rowCount = 0;
for (var i = 1 ; i < 1000000000; i++)
{
var cell = worksheet.Cells[i, 1].Value2; // "Value2", great name for a property, thanks guys
if (cell != null && cell.ToString() != "") // Very fragile (e.g. skipped rows will break this)
{
rowCount++;
}
else
{
break;
}
}
var name = spreadsheet.Name.Substring(spreadsheet.Name.IndexOf('p'), spreadsheet.Name.IndexOf('.') - spreadsheet.Name.IndexOf('p'));
excelRowCounts.Add(name, rowCount - 1);
}
}
I cannot believe this is the right way to do this. It is crazy slow and includes calls to properties with names like Value2 that do not feel like an intended part of a public API. But the method suggested elsewhere dramatically over reports the number of rows (with data in them).
What is the correct was to count the rows with data in them from an Excel worksheet?
========== EDIT 1 ==========
The reason that both UsedRange.Rows.Count and Sid's ACE.OLEDB solution over report the number of rows appears to be a pink background colour that is applied to some of the columns (but only extending to row 7091). Is there a simple/elegant way to count the rows with data in them (i.e. with non-null cell values) regardless of the display colour?
========== EDIT 2 ===========
Sid's ACE.OLEDB solution with the addition he suggests so that the tSQL line reads
var sql = "SELECT COUNT (*) FROM [" + sheetName + "$] WHERE NOT F1 IS NULL";
works. I'll mark that as the answer.
This should do the trick. You can call it with each filename to retrieve the number of rows.
private string GetNumberOfRows(string filename, string sheetName)
{
string connectionString;
string count = "";
if (filename.EndsWith(".xlsx"))
{
connectionString = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + filename + ";Mode=ReadWrite;Extended Properties=\"Excel 12.0;HDR=NO\"";
}
else if (filename.EndsWith(".xls"))
{
connectionString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + filename + ";Mode=ReadWrite;Extended Properties=\"Excel 8.0;HDR=NO;\"";
}
string SQL = "SELECT COUNT (*) FROM [" + sheetName + "$]";
using (OleDbConnection conn = new OleDbConnection(connectionString))
{
conn.Open();
using (OleDbCommand cmd = new OleDbCommand(SQL, conn))
{
using (OleDbDataReader reader = cmd.ExecuteReader())
{
reader.Read();
count = reader[0].ToString();
}
}
conn.Close();
}
return count;
}
There might be an even faster way of retrieving just the row count, but I know this works.
if you use interop is why don't use UsedRange?
_Worksheet.UsedRange.Rows.Count