Efficient function for reading a delimited file into DataTable - c#

I was wondering if anyone knew of an efficient c# function for reading a tab delimited file into a datatable?
Thanks

This currently uses the LINQ methods .First() and .Skip() both are easy to recreate if you need to use this on .Net 2.0
//even cooler as an extension method
static IEnumerable<string> ReadAsLines(string filename)
{
using (var reader = new StreamReader(filename))
while (!reader.EndOfStream)
yield return reader.ReadLine();
}
static void Main()
{
var filename = "tabfile.txt";
var reader = ReadAsLines(filename);
var data = new DataTable();
//this assume the first record is filled with the column names
var headers = reader.First().Split('\t');
foreach (var header in headers)
data.Columns.Add(header);
var records = reader.Skip(1);
foreach (var record in records)
data.Rows.Add(record.Split('\t'));
}

public System.Data.DataTable GetDataTable(string strFileName)
{
System.Data.OleDb.OleDbConnection conn = new System.Data.OleDb.OleDbConnection("Provider=Microsoft.Jet.OleDb.4.0; Data Source = " + System.IO.Path.GetDirectoryName(strFileName) + ";Extended Properties = \"Text;HDR=YES;FMT=TabDelimited\"");
conn.Open();
string strQuery = "SELECT * FROM [" + System.IO.Path.GetFileName(strFileName) + "]";
System.Data.OleDb.OleDbDataAdapter adapter = new System.Data.OleDb.OleDbDataAdapter(strQuery, conn);
System.Data.DataSet ds = new System.Data.DataSet("CSV File");
adapter.Fill(ds);
conn.Close();
return ds.Tables[0];
}

Here's one way to do it...
var dt = new DataTable();
dt.Columns.Add(new DataColumn("Column1", typeof(string)));
dt.Columns.Add(new DataColumn("Column2", typeof(string)));
dt.Columns.Add(new DataColumn("Column3", typeof(string)));
var lines = File.ReadAllLines(#"c:\tabfile.txt");
foreach( string line in lines )
dt.Rows.Add(line.Split('\t'));

DataTable ConvertToDataTable(string filePath, int numberOfColumns)
{
DataTable tbl = new DataTable();
for (int col = 0; col < numberOfColumns; col++)
tbl.Columns.Add(new DataColumn("Column" + (col + 1).ToString()));
string[] lines = System.IO.File.ReadAllLines(filePath);
foreach (string line in lines)
{
var cols = line.Split(' ');
DataRow dr = tbl.NewRow();
for (int cIndex = 0; cIndex < 3; cIndex++)
{
dr[cIndex] = cols[cIndex];
}
tbl.Rows.Add(dr);
}
return tbl;
}

Related

Creating DataTable from Text File and splitting

This is going to be my text file (30 lines)
OrderNo:37374
OrderQuantity:250
BarcodeQR:584,25478Klkd28
NormalBarcode:1565484864
.......
.......
.......
This is the code :
public DataTable DTforReport()
{
DataTable dt = new DataTable();
DataColumn col = new DataColumn("test");
col.DataType = System.Type.GetType("System.String");
dt.Columns.Add(col);
string[] lines = File.ReadAllLines("C:\\Users\\abc\\Desktop\\abc.txt");
foreach (var line in lines)
{
var segments = line.Split(';');
foreach (var seg in segments)
{
DataRow dr = dt.NewRow();
dr[0] = seg;
dt.Rows.Add(dr);
}
}
return dt;
}
I want my output like this
OrderNo OrderQuantity BarcodeQR
37374 250 584,25478Klkd28
How can I change my code to achieve this?
You have generated only one column. Change your code like below to see your desired result:
public DataTable DTforReport()
{
DataTable dt = new DataTable();
string[] lines = File.ReadAllLines("C:\\Users\\abc\\Desktop\\abc.txt");
DataRow dr = dt.NewRow();
for (int i = 0; i < lines.Length; i++)
{
DataColumn col = new DataColumn(lines[i].Split(':')[0]);
col.DataType = Type.GetType("System.String");
dt.Columns.Add(col);
var segment = lines[i].Split(':')[1];
dr[i] = segment;
}
dt.Rows.Add(dr);
return dt;
}
I suggest you to modify your method like the following:
public DataTable DTforReport()
{
DataTable testTable = new DataTable("Test");
testTable.Columns.Add("OrderNo");
testTable.Columns.Add("OrderQuantity");
testTable.Columns.Add("BarcodeQR");
string[] lines = File.ReadAllLines("C:\\Users\\abc\\Desktop\\abc.txt");
foreach (var line in lines)
{
DataRow dRow = testTable.NewRow();
var segments = line.Split(';');
for (int i = 0; i < segments.Length; i++)
{
var colValues = segments[i].Split(':');
dRow[i] = colValues[1];
}
testTable.Rows.Add(dRow);
}
return testTable;
}
Few suggestions for improvement:
I have given static column names, if you want to add more or they may change in future means you can create dynamic columns in the datatable.
If you have doubts in the input values, make use of proper validation
Validations in the sense, make sure about the splitted values before accessing them through index otherwise they may ends up with IndexOutOfRangeException
DataTable dt = new DataTable();
string[] lines = File.ReadAllLines("C:\\Users\\abc\\Desktop\\abc.txt");
var firstLine = lines.First();
var columns = firstLine.Split(';');
for (var icount = 0; icount < columns.Count(); icount++)
{
var colName = columns[icount].Contains(":") ? columns[icount].Split(':')[0] : "Column" + icount;
var dataCol = new DataColumn(colName);
dataCol.DataType = System.Type.GetType("System.String");
dt.Columns.Add(dataCol);
}
foreach (var line in lines)
{
DataRow dr = dt.NewRow();
var segments = line.Split(';');
for (var icount = 0; icount < segments.Count(); icount++)
{
var colVal = segments[icount].Contains(":") ? segments[icount].Split(':')[1] : "";
dr[icount] = colVal;
}
dt.Rows.Add(dr);
}
*Number of column must be same in each row.

Extract data from datatable

I extract data from all sheets in a workbook using the following code :
foreach (var sheetName in GetExcelSheetNames(connectionString))
{
if (sheetName.Contains("_"))
{
}
else
{
using (OleDbConnection con = new OleDbConnection(connectionString))
{
var dataTable = new DataTable();
string query = string.Format("SELECT * ,{0} as sheetName FROM [{0}]", sheetName);
con.Open();
OleDbDataAdapter adapter = new OleDbDataAdapter(query, con);
try
{
adapter.Fill(dataTable);
ds.Tables.Add(dataTable);
}
catch { }
}
}
I can't just figure how data are stocked in DataTable : sheetname is added as column ? how can I extract it ?
foreach (DataTable dt in ds.Tables)
{
using (SqlConnection con = new SqlConnection(consString))
{
con.Open();
for (int i = 0; i < dt.Rows.Count; i++)
{
for (int j = 0; j < dt.Columns.Count; j ++)
{
//what should I write here ?
}
}
}
In order to get the sheet name, using oledb, you will need to use code that looks something like this (thanks to this SO post and answer):
DataTable dtSheets = con.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
List<string> sheets= new List<string>();
foreach (DataRow dr in dtSheets.Rows)
{
if (dr["TABLE_NAME"].ToString().Contains("$"))//checks whether row contains '_xlnm#_FilterDatabase' or sheet name(i.e. sheet name always ends with $ sign)
{
sheets.Add(dr["TABLE_NAME"].ToString());
}
}
Below is how you access the values from a datatable:
var someValue = dt.Rows[i][j]
You need to get the item at the column index (j) of the row, at the row index (i), of the current datatable (dt).
Conversely, you can use the name of the column as well.
var someValue = dt.Rows[i]["columnName"]
assuming dt is your datatable variable,
do dt.Rows[row index][column index]
like
dt[2][4] -> will reference the 2nd row, 4th cell
I am not sure, but perhaps the sheetname might be stored at dt.TableName

Excel Sheets into multiple C# datatables

I am trying to import a excel spreadsheet into a an array of datatables. Each table will be a sheet from the spreadsheet. Right now I am seeing that each table contains the information from all sheets. I am thinking this part is not working correctly.
dataSet.Clear();
Let me know if you can see what I am doing wrong.
Here is the rest of the code.
public DataTable[] ReadDoc()
{
string filename = #"C:\Documents and Settings\user\Desktop\Test.xlsx";
DataTable dt = null;
string connectionString = String.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0};Extended Properties=\"Excel 8.0;HDR=YES\";", filename);
OleDbConnection connection = new OleDbConnection(connectionString);
DataSet dataSet = new DataSet();
DataSet finalDataSet = new DataSet();
DataTable[] table = new DataTable[3];
connection.Open();
dt = connection.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
if (dt == null)
{
return null;
}
String[] excelSheets = new String[dt.Rows.Count];
int i = 0;
foreach (DataRow row in dt.Rows)
{
excelSheets[i] = row["TABLE_NAME"].ToString();
i++;
}
// Loop through all of the sheets if you want too...
for (int j = 0; j < excelSheets.Length; j++)
{
string query = String.Format("SELECT * FROM [" + excelSheets[j] + "]");
dataSet.Clear();
OleDbDataAdapter dataAdapter = new OleDbDataAdapter(query, connectionString);
dataAdapter.Fill(dataSet);
table[j] = dataSet.Tables[0];
}
return table;
}
Thanks for the help.
The problem here is your dataSet, is declared outsife the for. Each datatable array item is getting the same information. dataSet.Tables[0]; You must declare inside the for. Each iteration store different information.
for (int j = 0; j < excelSheets.Length; j++)
{
DataSet dataSet = new DataSet();
string query = String.Format("SELECT * FROM [" + excelSheets[j] + "]");
.....
}

Convert datatable to datareader

For Performance improvement I want to convert datatable to datareader. I can not do that through query. So is there any other way to do so?
I know this is old, but the answers here seem to have missed the point of the OPs question.
DataTables have a method called CreateDataReader which will allow you to convert a DataTable to a DbDataReader object. In this case a DataTableReader.
DataTable table = new DataTable();
//Fill table with data
//table = YourGetDataMethod();
DataTableReader reader = table.CreateDataReader();
I should point out that this will not increase performance since you should be using one or the other.
Here are some more resources on the matter:
DataReader Vs DataTable
Is datareader quicker than dataset when populating a datatable?
For example
public DataTable ConvertDataReaderToDataTable(SqlDataReader dataReader)
{
DataTable datatable = new DataTable();
DataTable schemaTable = dataReader.GetSchemaTable();
try
{
foreach (DataRow myRow in schemaTable.Rows)
{
DataColumn myDataColumn = new DataColumn();
myDataColumn.DataType = myRow.GetType();
myDataColumn.ColumnName = myRow[0].ToString();
datatable.Columns.Add(myDataColumn);
}
while (dataReader.Read())
{
DataRow myDataRow = datatable.NewRow();
for (int i = 0; i < schemaTable.Rows.Count; i++)
{
myDataRow[i] = dataReader[i].ToString();
}
datatable.Rows.Add(myDataRow);
myDataRow = null;
}
schemaTable = null;
return datatable;
}
catch (Exception ex)
{
Error.Log(ex.ToString());
return datatable;
}
}
Use DataTable constructor,
DataTable table = new DataTable();
//Fill table with data
DataTableReader reader = new DataTableReader(table);
Good Look!
public DataTable GetTable(IDataReader _reader)
{
DataTable dataTable1 = _reader.GetSchemaTable();
DataTable dataTable2 = new DataTable();
string[] arrayList = new string[dataTable1.Rows.Count];
for (int i = 0; i < dataTable1.Rows.Count; i++)
{
DataColumn dataColumn = new DataColumn();
if (!dataTable2.Columns.Contains(dataTable1.Rows[i]["ColumnName "].ToString()))
{
dataColumn.ColumnName = dataTable1.Rows[i]["ColumnName "].ToString();
dataColumn.Unique = Convert.ToBoolean(dataTable1.Rows[i]["IsUnique "]);
dataColumn.AllowDBNull = Convert.ToBoolean(dataTable1.Rows[i]["AllowDBNull "]);
dataColumn.ReadOnly = Convert.ToBoolean(dataTable1.Rows[i]["IsReadOnly "]);
dataColumn.DataType = (Type)dataTable1.Rows[i]["DataType "];
arrayList[i] = dataColumn.ColumnName;
dataTable2.Columns.Add(dataColumn);
}
}
dataTable2.BeginLoadData();
while (_reader.Read())
{
DataRow dataRow = dataTable2.NewRow();
for (int j = 0; j < arrayList.Length; j++)
{
dataRow[arrayList[j]] = _reader[arrayList[j]];
}
dataTable2.Rows.Add(dataRow);
}
_reader.Close();
dataTable2.EndLoadData();
return dataTable2;
}

The First Column of the excel file to put in string variable C#?

What i need id to take the first column of an excel file and put that column in a string variable but numbers that will be taken from excel to be separated with blank space.
For Example:
Excel file:
1
3
4
56
76
7
876
23
43
(in column)
and string in C#
string number = "1 3 4 56 76 7 876 23 43"
Any code will help me
I have made the connection with excel file and i have created the dataset/datatable
but now i can not take those numbers as the sample above ?
If you have the data in a datset...you can do something like this....
////TRY THIS!!!
DataSet dsExcelContent = new DataSet();
//Fill from db
//
StringBuilder builder = new StringBuilder();
foreach (DataRow row in dsExcelContent.Tables[0].Rows)
{
builder.Append(row[0].ToString());
builder.Append(" ");
}
Console.WriteLine(builder.ToString());
.....More exact to your code....
OleDbConnection objConn = new OleDbConnection(sConnectionString);
objConn.Open();
OleDbCommand objCmdSelect = new OleDbCommand("SELECT * FROM [sheet1$]", objConn);
OleDbDataAdapter objAdapter1 = new OleDbDataAdapter();
objAdapter1.SelectCommand = objCmdSelect;
DataSet dsExcelContent = new DataSet();
DataTable dsExcelContent1 = new DataTable();
objAdapter1.Fill(dsExcelContent);
dataGridView1.DataSource = dsExcelContent1;
objConn.Close();
int test = dsExcelContent.Tables[0].Rows.Count;
StringBuilder builder = new StringBuilder();
foreach (DataRow row in dsExcelContent.Tables[0].Rows)
{
builder.Append(row[0].ToString());
builder.Append(" ");
}
//s has the data you want.....
string s = builder.ToString();
//REST OF YOUR CODE.....
Have a link - Reading Excel Document
Here's what I would do...first open the spreadsheet with this code.
xlApp = New Excel.Application
filePath = "FILENAME"
xlWorkBook = xlApp.Workbooks.Open(filePath)
xlWorkSheet = xlWorkBook.Worksheets(1)
Then read through the column like this
For data As Integer = 0 To 8
Dim obj As Excel.Range = CType(xlWorkSheet.Cells(data, 1), Range)
If obj.Value IsNot Nothing Then
MyArray(data) = obj.Value
Else
Exit Do
End If
Next
Then you should have all your data in MyArray then loop through and create a string with spaces and you should be good. This code is in VB but it should give you a good idea of the commands and structure to use.
private void Form1_Load(object sender, EventArgs e)
{
String sConnectionString = #"Provider=Microsoft.ACE.OLEDB.12.0; Data Source=unsorted.xls;Extended Properties=""Excel 12.0;HDR=NO;""";
OleDbConnection objConn = new OleDbConnection(sConnectionString);
objConn.Open();
OleDbCommand objCmdSelect = new OleDbCommand("SELECT * FROM [sheet1$]", objConn);
OleDbDataAdapter objAdapter1 = new OleDbDataAdapter();
objAdapter1.SelectCommand = objCmdSelect;
DataSet dsExcelContent = new DataSet();
DataTable dsExcelContent1 = new DataTable();
objAdapter1.Fill(dsExcelContent);
dataGridView1.DataSource = dsExcelContent1;
objConn.Close();
int test = dsExcelContent.Tables[0].Rows.Count;
foreach(DataRow row in dsExcelContent.Tables[0].Rows)
{
StringBuilder builder = new StringBuilder();
foreach (DataColumn col in dsExcelContent.Tables[0].Columns)
{
builder.Append(row[col].ToString());
builder.Append(" ");
}
string s = builder.ToString();
this.label1.Text = s;
string[] numbers = s.Split(' ');
ArrayList numberList = new ArrayList();
int i;
foreach (String num in numbers)
{
if (Int32.TryParse(num, out i))
{
numberList.Add(i);
}
else
Console.WriteLine("'{0}' is not a number!", num);
}
this.listBox1.DataSource = numberList;
}
}
}
You just want to loop through the rows in the Dataset now:
var numberStr = EmptyStr;
foreach (DataRow dr in MyDataSet.Tables[0].Rows)
{
numberStr = EmptyStr ? numberStr += dr[0].ToString() : numberStr += " " + dr[0].ToString();
}
Updated Solution
private void Form1_Load(object sender, EventArgs e)
{
String s = String.Empty;
String sConnectionString = #"Provider=Microsoft.ACE.OLEDB.12.0;DataSource=unsorted.xls;Extended Properties=""Excel 12.0;HDR=NO;""";
using (OleDbConnection conn = new OleDbConnection(sConnectionString))
{
conn.Open();
DataTable schemaTable = conn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, new object[] { null, null, null, "TABLE" });
foreach (DataRow schemaRow in schemaTable.Rows)
{
string sheet = schemaRow["TABLE_NAME"].ToString();
OleDbCommand cmd = new OleDbCommand("SELECT * FROM [" + sheet + "]", conn);
cmd.CommandType = CommandType.Text;
DataTable outputTable = new DataTable(sheet);
output.Tables.Add(outputTable);
new OleDbDataAdapter(cmd).Fill(outputTable);
}
// populate string with value from rows
foreach (DataRow dr in MyDataSet.Tables[0].Rows)
{
s = String.Empty ? s += dr[0].ToString() : s += " " + dr[0].ToString();
}
dataGridView1.DataSource = dsExcelContent1;
objConn.Close();
}
this.label1.Text = s;
string[] numbers = s.Split(' ');
ArrayList numberList = new ArrayList();
int i;
foreach (String num in numbers)
{
if (Int32.TryParse(num, out i))
{
numberList.Add(i);
}
else
{
Console.WriteLine("'{0}' is not a number!", num);
}
}
this.listBox1.DataSource = numberList;
}

Categories

Resources