I'm totally new with OleDB and reading excel files. I have a worksheet with 3 columns (Name - Surname - E-mail Address) and I need to:
know the rows number
read all the addresses in the third columns
extract one by one each address
I use an OpenFileDialog object (ofd) and a TextBox (excel) to display the selected file. This is my code:
if (ofd.ShowDialog() == DialogResult.OK)
{
excel.Text = ofd.FileName;
connection = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + excel.Text + ";Extended Properties=\"Excel 12.0 Xml;HDR=NO;IMEX=1\"";
conn.ConnectionString = connection;
conn.Open();
string name_query = "SELECT A FROM[" + ofd.SafeFileName + "]";
OleDbDataAdapter da = new OleDbDataAdapter(name_query, conn);
da.Fill(table);
conn.Close();
j = table.Rows.Count;
}
It doesn't work, a query problem in the "FROM...". I usually read this type of query:
"SELECT * FROM [Sheet1$]"
but I can't find what Sheet1$ exactly is. Someone could explain me the right query?
2) To access to each element of the table (it would contain only the third column) and save it in a string variable what I have to do?
Thanks a lot!
To get the sheetnames, you can use the default getschema functionality from the data providers (connection.getschema).
Without column headers (HDR=NO), the columns are named F1,F2,etc., so you for the third field, you could query on F3. If you want to be completely sure, you can also use getschema to get the column names of the sheet/table found with the first getschema.
Finally, to get the values in a string list, you can use a bit of Linq (see the stringlist in the example). Not sure if you meant in a single string value, but if that's the case, you can use a string.join on the linq select.
Combined code starting form connection opening:
conn.Open();
var tableschema = conn.GetSchema("Tables");
var firstsheet = tableschema.Rows[0]["TABLE_NAME"].ToString();
string name_query = "SELECT F3 FROM [" + firstsheet + "]";
OleDbDataAdapter da = new OleDbDataAdapter(name_query, conn);
da.Fill(table);
conn.Close();
j = table.Rows.Count;
var stringlist = table.Rows.Cast<DataRow>().Select(dr => dr[0].ToString()).ToList();
Related
I want to read all data from an xls file using OLEDB, but I don't have any experience in that.
string filename = #"C:\Users\sasa\Downloads\user-account-creation_2.xls";
string connString = #"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + filename + ";Extended Properties='Excel 8.0;HDR=YES'";
using (System.Data.OleDb.OleDbConnection conn = new System.Data.OleDb.OleDbConnection(connString))
{
conn.Open();
System.Data.OleDb.OleDbCommand selectCommand = new System.Data.OleDb.OleDbCommand("select * from [Sheet1$]", conn);
System.Data.OleDb.OleDbDataAdapter adapter = new System.Data.OleDb.OleDbDataAdapter(selectCommand);
DataTable dt = new DataTable();
adapter.Fill(dt);
int counter = 0;
foreach (DataRow row in dt.Rows)
{
String dataA = row["email"].ToString();
// String dataB= row["DataB"].ToString();
Console.WriteLine(dataA + " = ");
counter++;
if (counter >= 40) break;
}
}
I want to read all data from email row
I get this error
'Sheet$' is not a valid name. Make sure that it does not include invalid characters or punctuation and that it is not too long
Well, you don't have a sheet called Sheet1 do you? Your sheet seems to be called "email address from username" so your query should be....
Select * From ['email address from username$']
Also please don't use Microsoft.Jet.OLEDB.4.0 as it's pretty much obsolete now. Use Microsoft.ACE.OLEDB.12.0. If you specify Excel 12.0 in the extended properties it will open both .xls and .xlsx files.
You can also load the DataTable with a single line...
dt.Load(new System.Data.OleDb.OleDbCommand("Select * From ['email address from username$']", conn).ExecuteReader());
To read the names of the tables in the file use...
DataTable dtTablesList = conn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
foreach (DataRow drTable in dtTablesList.Rows)
{
//Do Something
//But be careful as this will also return Defined Names. i.e ranges created using the Defined Name functionality
//Actual Sheet names end with $ or $'
if (drTable["Table_Name"].ToString().EndsWith("$") || drTable["Table_Name"].ToString().EndsWith("$'"))
{
Console.WriteLine(drTable["Table_Name"]);
}
}
Is it possible to use the Open XML SDK?
https://learn.microsoft.com/en-us/office/open-xml/how-to-retrieve-the-values-of-cells-in-a-spreadsheet
I am reading excel having like million of records first i query my table (no records) and get Datable. i query my table to get columns name as define in my excel sheet using alias.
var dal = new clsConn();
var sqlQuery = "SELECT FETAPE_THEIR_TRANDATE \"Date\" ,ISSUER Issuer, ISSU_BRAN Branch , STAN_NUMB STAN, TERMID TermID, ACQUIRER Acquirer,DEBIT_AMOUNT Debit,CREDIT_AMOUNT Credit,CARD_NUMB \"Card Number\" , DESCRIPTION Description FROM ALLTRANSACTIONS";
var returntable = dal.ReadData(sqlQuery);
DataRow ds = returntable.NewRow();
var dtExcelData = returntable;
So my datatable looks like this,
Then i read records from excel sheet
OleDbConnection con = null;
if (ext == ".xls")
{
con = new OleDbConnection(#"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + filepath + ";Extended Properties=Excel 8.0;");
}
else if (ext == ".xlsx")
{
con = new OleDbConnection(#"Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + filepath + ";Extended Properties=\"Excel 12.0;IMEX=2;HDR=NO;TypeGuessRows=0;ImportMixedTypes=Text\"");
}
con.Open();
dt = con.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
string getExcelSheetName = dt.Rows[0]["Table_Name"].ToString();
//OleDbCommand ExcelCommand = new OleDbCommand(#"SELECT * FROM [" + getExcelSheetName + #"]", con);
OleDbCommand ExcelCommand = new OleDbCommand("SELECT F1, F2, F3, F4, F5,F6,F7,F8,F9,F10 FROM [Sheet1$]", con);
OleDbDataAdapter ExcelAdapter = new OleDbDataAdapter(ExcelCommand);
try
{
ExcelAdapter.Fill(dtExcelData); //Here I give the datatable which i made previously
}
catch (Exception ex)
{
//lblAlert2.CssClass = "message-error";
//lblAlert2.Text = ex.Message;
}
It reads successfully and fill data in datatable but creating its own column in data table like F1 to F10 how can i move this data to exactly match with my defined columns in datatable
How Will i manage this to not create other columns (f1,f2..f10)
any workaround will be appreciable or Please explain what i am doing wrong and how can i achieve this.
UPDATE :
My Excel file looks like this
The Microsoft.ACE.OLEDB.12.0 driver will handle both types of excel spreadsheets and using the same Extended Properties. i.e. "Excel 12.0" will open both .xls and .xlsx.
Leave the HDR=NO as OLEDB expects them in the first row and they are actually in row 11.
Sadly "TypeGuessRows=0;ImportMixedTypes=Text" is completely ignored by Microsoft.ACE.OLEDB.12.0, you've got to play around with the registry (yuk). Change your IMEX=2 to IMEX=1 to ensure that mixed data types as handled as text.
Change back to using "Select * From [Sheet1$]" and then I'm afriad that you are going to have to handle the source data manually.
OleDbConnection con = new OleDbConnection(#"Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + filepath + ";Extended Properties=\"Excel 12.0;IMEX=1;HDR=NO\"");
con.Open();
DataTable dt = con.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
string getExcelSheetName = (string)dt.Rows[0]["Table_Name"];
DataTable xlWorksheet = new DataTable();
xlWorksheet.Load(new OleDbCommand("Select * From [" + getExcelSheetName + "]", con).ExecuteReader());
//More than 11 rows implies 11 header rows and at least 1 data row
if (xlWorksheet.Rows.Count > 11 & xlWorksheet.Columns.Count >= 10)
{
for (int nRow = 11; nRow < xlWorksheet.Rows.Count; nRow++)
{
DataRow returnRow = returntable.NewRow();
for (int nColumn = 0; nColumn < 10; nColumn++)
{
//Note you will probably get conversion problems here that you will have to handle
returnRow[nColumn] = xlWorksheet.Rows[nRow].ItemArray[nColumn];
}
returntable.Rows.Add(returnRow);
}
}
I'm guessing you simply want to add the excel data into your ALLTRANSACTION table? You don't specify but it seems the likely outcome of this. If so this is a terrible way to do it. You don't need to read the whole table into memory append data and then update the database. All you need to do is read the excel file and insert the data to the Oracle table.
Some thoughts, your returntable will contain data so if you just want the structure of the table then add a "Where RowNum=0" to the Select statement. To add the data to your Oracle Database you could 1) Convert to using the Oracle Data Provider (ODP) and then use using OracleBulkCopy Class or 2) simply modify the above to insert row by row as you read the data. As long as you don't have a LOT of data in your Excel spreadsheet it will work just fine. Having said that a Million rows is a LOT so perhaps not the best option. You will need to validate the input as Excel is not the best data source really.
I am reading data from an Excel file. when I read the normal Excel file,It works fine but when I read an excel file which has columns like shown below it does not find the work sheet and gives an exception-
The Microsoft Jet database engine could not find the object 'Sheet1$_'. Make sure the object exists and that you spell its name and the path name correctly.
My Code to read the excel is-
private static DataTable getExcelData(string ExcelPath)
{
OleDbConnection con;
string connectionString;
string[] pathArray = ExcelPath.Split('.');
var Extention = pathArray[pathArray.Length - 1];
if (Extention == "xlsx")
//read a 2007 file
connectionString = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" +
ExcelPath + ";Extended Properties=\"Excel 8.0;HDR=YES;\"";
else
//read a 97-2003 file
connectionString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" +
ExcelPath + ";Extended Properties=Excel 8.0;";
con = new OleDbConnection(connectionString);
if (con.State == ConnectionState.Closed)
{
con.Open();
}
DataTable dbSchema = con.GetOleDbSchemaTable(OleDbSchemaGuid.Columns, null);
var firstSheetName = dbSchema.Rows[0]["TABLE_NAME"];
OleDbDataAdapter cmd = new OleDbDataAdapter("select * from [" + firstSheetName + "] Where NOT [Event Code]=''", con);
DataSet ds = new DataSet();
cmd.Fill(ds);
con.Close();
return ds.Tables[0];
}
}
I have to get all the columns inside Mon,Tues etc.
GetOleDbSchemaTable also returns hidden tables in your Excel file: usually a name like Sheet1$_ indicates an hidden table created when you apply a filter on Sheet1$.
You need to change your code: search for table that ends with $ to set firstSheetName.
Please note that OLEDB does not preserve the sheet order as they were in Excel.
Also note that you need to do this to read an excel file with multirow titles:
set HDR=No in EXTENDED PROPERTIES of your connection string
specify column name and select range in your OleDbCommand in order to skip the first two rows
For example:
SELECT [F1] AS Location,
[F2] AS EmpId,
[F3] AS EmpName,
[F4] AS MondayShift,
[F5] AS Monday15Min,
[F6] AS Monday30Min,
[F7] AS Monday15Min2
FROM [Sheet1$A3:G]
I have a piece of code where I want to extract values from the A column of an Excel sheet. Right now, here is the code I'm using and having an issue with:
m_connString = "Provider = Microsoft.ACE.OLEDB.12.0; Data Source = " + m_source + "; Extended properties = 'Excel 12.0; HDR = NO; IMEX = 1;';";
using (OleDbConnection conn = new OleDbConnection(m_connString))
{
conn.Open();
DataTable dt = conn.GetOleDbSchemaGuid(Tables, null);
DataSet ds = new DataSet();
string defaultSheet = ExcelSheets.Rows[0]["TABLE_NAME"].ToString();
OleDbCommand comm = new OleDbCommand("SELECT * FROM [" + defaultSheet + "]", conn);
OleDbDataAdapter adapter = new OleDbDataAdapter(comm);
// Bug appears here
adapter.fill(ds)
// Fill a List<string> with the data found
for (int r = 0; r < ds.Tables[0].Row.Count; r++)
{
m_list.Add(ds.Tables[0].Rows[r][0].ToString();
}
}
What is happening is that, if I have an Excel file with the following content in the A column:
Row1
Row2
Row3
...
RowX
...What I end up getting is all values except for the first value (Row1). It turns out that Row1 is being used as a column name(?) for the DataSet's table. However, I don't want there to be any column names or headers, and I specifically state this in the connection string.
How can I prevent this behavior so I can have all my data placed in the List? Or, failing that, how can I work around this issue and extract Row1 from that DataSet?
Check Microsoft Reference to understand how Extended Properties of the connection HDR=NO works:
Column headings: By default, it is assumed that the first row of your
Excel data source contains columns headings that can be used as field
names. If this is not the case, you must turn this setting off, or
your first row of data "disappears" to be used as field names. This is
done by adding the optional HDR= setting to the Extended Properties of
the connection string. The default, which does not need to be
specified, is HDR=Yes. If you do not have column headings, you need to
specify HDR=No; the provider names your fields F1, F2, etc.
Here is example:
Excel File Data (test.xlsx):
Code:
string m_source = "test.xlsx";
string m_connString = #"Provider = Microsoft.ACE.OLEDB.12.0;
Data Source = " + m_source + #";
Extended properties = 'Excel 12.0;
HDR= NO;
IMEX = 1;';";
using (OleDbConnection conn = new OleDbConnection(m_connString))
{
conn.Open();
string squery = "SELECT f1, f2, f3 FROM [Sheet1$]";
OleDbCommand comm = new OleDbCommand(squery, conn);
OleDbDataAdapter adapter = new OleDbDataAdapter(comm);
DataSet ds = new DataSet();
adapter.Fill(ds);
}
DataSet Visualizer:
Try this way:
DataTable table = ds.Tables[0];
foreach (DataColumn column in table.Columns)
{
string cName = table.Rows[0][column.ColumnName].ToString();
if (!table.Columns.Contains(cName) && cName != "")
{
column.ColumnName = cName;
}
}
change the sql to something along the lines of this
"SELECT * FROM [" + defaultSheet + "] Except Select Top(1)"
I've been at this for few hours now and can't seem to find a solution. I have 2 inventory lists, one a spreadsheet and the other a data table. I need to match the spreadsheet against the data table to find out if I have missing inventory. The spreadsheet should match with what I have in the db, ie the spreadsheet is like a master so when I have missing inventory in DB I need to add it an list and build a report.
I thought by looping throught the spreadsheet and for each inventory in the spreadsheet loop through the data table I can achieve my goal but that proved to be wrong. Any ideas how I would do this?
Thanks,
Eric
Here is the method:
public void Reconcile()
{
ObjectDataSource ods = new ObjectDataSource();
ods.ID = "ods";
ods.TypeName = "";
ods.SelectMethod = "GetAssets";
ods.TypeName = "dsAssetsTableAdapters.AssetsTableAdapter";
ods.SelectParameters.Clear();
ReportDataSource rds = new ReportDataSource("dsAssets_Assets", ods);
reportViewer1.LocalReport.DataSources.Clear();
reportViewer1.LocalReport.DataSources.Add(rds);
string _list = "";
string _list_missing_SN = "";
string filename = Server.MapPath("XLS/reconcile.xls");
string sheetname = GetExcelSheetNames(filename)[0].ToString();
String sConnectionString = "Provider=Microsoft.Jet.OLEDB.4.0;" +
"Data Source=" + filename + ";" +
"Extended Properties=Excel 8.0;";
OleDbConnection objConn = new OleDbConnection(sConnectionString);
objConn.Open();
OleDbCommand objCmdSelect = new OleDbCommand("SELECT * FROM [" + sheetname + "]", objConn);
OleDbDataAdapter objAdapter1 = new OleDbDataAdapter();
objAdapter1.SelectCommand = objCmdSelect;
DataSet objDataset1 = new DataSet();
objAdapter1.Fill(objDataset1, "XLData");
string m_AssetManagement = System.Configuration.ConfigurationManager.ConnectionStrings["Asset_Management"].ToString();
List<string> SN_list = new List<string>();
SqlDataReader Assets_rd;
SqlCommand cmdMyAssets = new SqlCommand();
cmdMyAssets.Connection = new SqlConnection(m_AssetManagement);
cmdMyAssets.CommandType = CommandType.StoredProcedure;
cmdMyAssets.CommandText = "sp_Assets_Hardware_Select_by_Serial_Number";
try
{
cmdMyAssets.Connection.Open();
Assets_rd = cmdMyAssets.ExecuteReader();
string strString;
while (Assets_rd.Read())
{
strString = Assets_rd.GetSqlString(0).ToString().Trim() + "^" + Assets_rd.GetInt32(1).ToString().Trim() + "^" + Assets_rd.GetInt32(2).ToString().Trim();
SN_list.Add(strString);
}
}
catch (SqlException dbError)
{
Trace.Write("Database unavailable with Message: ", dbError.Message);
Trace.Write("Stack Trace: ", dbError.StackTrace);
throw;
}
bool record_match = false;
foreach (DataRow drXCL in objDataset1.Tables[0].Rows)
{
if (drXCL.ItemArray[1].ToString() != string.Empty)
{
try
{
string[] assetInfo = null;
assetInfo = SN_list[0].Split('^');
if (assetInfo[0].Contains(drXCL.ItemArray[1].ToString()))
{
_list += "|" + drXCL.ItemArray[1].ToString();
}
else
{
_list_missing_SN += drXCL.ItemArray[1].ToString().Trim() + "<br>";
}
}
catch (Exception SqlEx)
{
// Throw Sqw Exception
clAppExceptions.buildEmailNotification(SqlEx.Message.ToString());
}
}
else
{
//_list += "|*** NO SERIAL NUMBER ***";
}
}
if (_list_missing_SN != "")
{
Page.ClientScript.RegisterClientScriptBlock(this.Page.GetType(), "myAlert", "<script language='javascript'>alert('Following Serial Numbers were not on the spreasheet: " + _list_missing_SN + "');</script>");
}
_list += "|";
ods.SelectMethod = "GetAssetsBySerialNumbers";
ods.SelectParameters.Add("list", _list);
reportViewer1.LocalReport.ReportPath = Server.MapPath("~/Reports/Asset_List.rdlc");
ReportParameter rpCategory = new ReportParameter("ReportParameter", "These assets are gone.");
ReportParameter[] _rpCategory = { rpCategory };
reportViewer1.LocalReport.SetParameters(_rpCategory);
reportViewer1.LocalReport.Refresh();
}
I would load the master list into an array and create a second array of bools that correspond to the positions of the first array. Then looping through the datatable, when you find the element, flip the bool to true. If you cannot find it, store that element in a not found array. Once the datatable loop is finished, you can produce 2 lists. The first list is the items in the datatable but not in the master list... the not found array. The second list is created by looping through the bool array, any value of false means that the master list element was not found in the datatable.
This can then be expanded to include counts or other pieces of information that should match but do not.
I'd suggest a different approach. You could copy the data from the database and put it in the spreadsheet on a different worksheet and use the match function. You could also take the data from the spreadsheet and put it in a new table. Then use a query to find discrepencies. I don't think a programming solution is required unless this isn't a one time thing. If this is required for an application of some sort, ignore my answer:)
Don't know if this is of any use but if you have the two lists in IEnumerable sequeneces you could do something simple with LINQ.
I have an extension method I wrote for IEnumerable that I use for this purpose:
public static IEnumerable<T> NotIn<T>(this IEnumerable<T> inputSequence, IEnumerable<T> secondSequence)
{
return secondSequence == null ? new List<T>(inputSequence) : inputSequence.Where(element => !secondSequence.Contains(element));
}
If I recall correctly I ended up finding a native LINQ function that accomplished the same thing but I, of course, forgot what it was
If your just looking for a quick solution, I would just do everything in Excel. It's easy to link Excel to a DB and to link lists.
Link your DB to your Excel file (this way it's always linked to the DB)
Insert a formula to check if the (part, key, etc...) in your master list exists in your list from the DB.
Use this link to see how to link lists in Excel.
Ultimately you have many options. To make a sound decision you need to answer a few questions.
How often will this task need to be performed?
What level of resources do you have available to utilize?
How quickly does this task need to run?
How much data needs to be compared?
Once you have answered these questions, we can suggest a solid solution to you more accurately.
Keep it simple... ADO.Net will probably the simplest approach for this problem. If you fill a DataTable with the values from the spreadsheet (hopefully using OleDb) you will be able to also pull information from the Database (using either OleDb or the correct ADO.Net client.) You can then update the values back into the database for fields such as location or last seen time. These Fill and Update commands can be queries or stored procs.
If you provide more detail such as table schema I could expand my answer further.
Edit...
If you already have one of the sources in a DataTable in .Net you could put both of them in the same DataSet and write a DataView query that would do an outer join. The Outer Join would allow you to see the matched and unmatched values.
Updated...
Sorry it took so long to get back to this. (Started a new job so I have been rather busy.) I am using two spreedsheets, but there is not reason that you couldn't use thie same concept between different databases and even different ADO.Net providers. The basic idea behind this example is to create a LastSeen timestamp in your database. Then instead of looking for what isn't there, you post the latest inventroy back to the database and then query for what hasn't been updated.
var inventoryFile = "Inventory.xlsx"; //ID,Item
var databaseFile = "Database.xlsx"; //ID,Item,Type,SN,LastSeen
var connectionFormatter = "Provider=Microsoft.ACE.OLEDB.12.0;" +
"Data Source=\"{0}\";Mode=ReadWrite;" +
"Extended Properties=\"Excel 12.0 Xml;HDR=Yes;\";";
var inventoryConnectionString = string.Format(connectionFormatter,
inventoryFile);
var databaseConnectionString = string.Format(connectionFormatter,
databaseFile);
using (var inventoryConnection =
new OleDbConnection(inventoryConnectionString))
using (var databaseConnection =
new OleDbConnection(databaseConnectionString))
{
if (inventoryConnection.State != ConnectionState.Open)
inventoryConnection.Open();
if (databaseConnection.State != ConnectionState.Open)
databaseConnection.Open();
var lastSeenCmdString = "SELECT MAX(LastSeen) FROM [Sheet1$]";
var lastSeenCommand = new OleDbCommand(lastSeenCmdString,
databaseConnection);
var lastSeen = lastSeenCommand.ExecuteScalar();
var inventorySelectCmdString = "SELECT ID, Item FROM [Sheet1$]";
var inventoryCmd = new OleDbCommand(inventorySelectCmdString,
inventoryConnection);
var table = new DataTable();
var idCol = table.Columns.Add("ID", typeof(int));
var itemCol = table.Columns.Add("Item", typeof(int));
var inventoryDataAdapter = new OleDbDataAdapter(inventoryCmd);
var databaseDataAdapter = new OleDbDataAdapter();
var updateLastSeenCmdString =
"UPDATE [Sheet1$] SET LastSeen=NOW() WHERE Item=?";
var updateCmd = new OleDbCommand(updateLastSeenCmdString,
databaseConnection);
var parameter = updateCmd.Parameters.Add("Item",
OleDbType.Integer,
0,
"Item");
databaseDataAdapter.UpdateCommand = updateCmd;
inventoryDataAdapter.Fill(table);
table.AcceptChanges();
foreach (var row in table.Rows.OfType<DataRow>())
row.SetModified();
databaseDataAdapter.Update(table);
var notSeenCmdString = "SELECT ID,Item,Type,SN,LastSeen " +
"FROM [Sheet1$]" +
"WHERE LastSeen <= ?";
var notSeenCmd = new OleDbCommand(notSeenCmdString,
databaseConnection);
notSeenCmd.Parameters.Add("LastSeen", OleDbType.Date).Value = lastSeen;
databaseDataAdapter.SelectCommand = notSeenCmd;
var missingInventory = new DataTable();
databaseDataAdapter.Fill(missingInventory);
foreach (var row in missingInventory.Rows.OfType<DataRow>())
Console.WriteLine("ID: {0} Item:{1} Type:{2} SN:{3} LastSeen:{4}",
row.ItemArray);
}