C# Read a particular value from CSV file [duplicate] - c#

This question already has answers here:
Reading CSV file and storing values into an array
(21 answers)
Closed 8 years ago.
I am a learner in C#. I want to read a particular value from the CSV file. I have learned the getting the csv file into a datatable through browsing. Please see the following code (Thanks to surendra jha) and my CSV file format. Say, I want to get what is the 'Volume' for 'ID' = 90.
CSV file
ID:Volume:Name
100:5600:A
95:5000:B
90:4500:C
85:4000:D
Code for getting all the values:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using System.Data;
namespace DVHConsolePrj
{
class Program
{
static void Main(string[] args)
{
readCsvFileData();
}
static void readCsvFileData()
{
string path = #"C:\IDVolumeName.txt";
StreamReader streamreader = new StreamReader(path);
DataTable datatable = new DataTable();
int rowcount = 0;
string[] columnname = null;
string[] streamdatavalue = null;
while (!streamreader.EndOfStream)
{
string streamrowdata = streamreader.ReadLine().Trim();
if (streamrowdata.Length > 0)
{
streamdatavalue = streamrowdata.Split(':');
if (rowcount == 0)
{
rowcount = 1;
columnname = streamdatavalue;
foreach (string csvheader in columnname)
{
DataColumn datacolumn = new DataColumn(csvheader.ToUpper(), typeof(string));
datacolumn.DefaultValue = string.Empty;
datatable.Columns.Add(datacolumn);
}
}
else
{
DataRow datarow = datatable.NewRow();
for (int i = 0; i < columnname.Length; i++)
{
datarow[columnname[i]] = streamdatavalue[i] == null ? string.Empty : streamdatavalue[i].ToString();
}
datatable.Rows.Add(datarow);
}
}
}
streamreader.Close();
streamreader.Dispose();
foreach (DataRow dr in datatable.Rows)
{
string rowvalues = string.Empty;
foreach (string csvcolumns in columnname)
{
rowvalues += csvcolumns + "=" + dr[csvcolumns].ToString() + " ";
}
Console.WriteLine(rowvalues);
}
Console.ReadLine();
}
}
}

Instead of parsing the file manually in a DataTable, then doing some Linq, use Linq directly on it, using this library.
It works pretty well and is very efficient with big files.
For instance.
1) Add nuget package in your project, and the following line to be able to use it:
using LINQtoCSV;
2) define the class that olds the data
public class IdVolumeNameRow
{
[CsvColumn(FieldIndex = 1)]
public string ID { get; set; }
[CsvColumn(FieldIndex = 2)]
public decimal Volume { get; set; }
[CsvColumn(FieldIndex = 3)]
public string Name{ get; set; }
}
3) and search for the value
var csvAttributes = new CsvFileDescription
{
SeparatorChar = ':',
FirstLineHasColumnNames = true
};
var cc = new CsvContext();
var volume = cc.Read<IdVolumeNameRow>(#"C:\IDVolumeName.txt", csvAttributes)
.Where(i => i.ID == "90")
.Select(i => i.Volume)
.FirstOrDefault();

public DataTable CSVToDataTable(string filename, string separator)
{
try
{
FileInfo file = new FileInfo(filename);
OleDbConnection con =
new OleDbConnection("Provider=Microsoft.Jet.OLEDB.4.0;Data Source=\"" +
file.DirectoryName + "\";
Extended Properties='text;HDR=Yes;FMT=Delimited(" + separator + ")';")
OleDbCommand cmd = new OleDbCommand(string.Format
("SELECT * FROM [{0}]", file.Name), con);
con.Open();
DataTable tbl = new DataTable();
using (OleDbDataAdapter adp = new OleDbDataAdapter(cmd))
{
tbl = new DataTable("MyTable");
adp.Fill(tbl);
}
return tbl;
}
catch(Exception ex)
{
throw ex;
}
finally()
{
con.Close();
}
}
You can try this code, it is build on the fly, it is possible little errors to exist. Check OleDbConnection. When you return the DataTable you can search in the table using LINQ.
var results = from myRow in myDataTable.AsEnumerable()
where myRow.Field<int>("ID") == 90
select myRow;
Here you can take the row with ID=90 !

For filtering DataTable you can use DataTable.Select method like this
var filtered = dataTable.Select("ID = '90'");
filtered above is array of datarow that suitable for the condition, so for get value from first filtered row you can use something like
if(filtered.Length>0){
var Volume = filtered[0]["VOLUME"];
}

Related

Consecutively reading data from Excel in C#/NUnit test cases, returns blank data for 2nd Case

I'm trying to write a parameterized NUnit test that executes twice. Each time it runs, it references a different row in a spreadsheet and gets the username and password based on int rowNum.
class Test
{
//Run the test twice
[Test,TestCase(1),TestCase(2)]
public void T101_LoginTestSuite_Valid(int rowNum)
{
Console.WriteLine(TestContext.CurrentContext.Test.MethodName); //Test Name
Console.WriteLine("Row number "+rowNum);// Value of rowNum
ExcelDataFactory.GetTestDataSet(TestContext.CurrentContext.Test.MethodName);
//Print out the credentials
Console.WriteLine(ExcelDataFactory.ReadData(rowNum,"username"));
Console.WriteLine(ExcelDataFactory.ReadData(rowNum, "password"));
}
}
Here is the excel
The first test case gets username and password correctly.
However the second test case returns blank (If I run this individually it will work!)
Below is the ExcelDataFactory code:
class ExcelDataFactory
{
//Get data from excel
private static DataTable ExcelToDataTable(String filename, String sheetName)
{
//Open file and returns as Stream
FileStream stream = File.Open(filename, FileMode.Open, FileAccess.Read);
//CreateOpenXmlReader via ExcelReaderFactory
IExcelDataReader excelReader = ExcelReaderFactory.CreateOpenXmlReader(stream); //.xlsx
//Return as DataSet and set the frist row as column name
DataSet result = excelReader.AsDataSet(new ExcelDataSetConfiguration()
{
ConfigureDataTable = (_) => new ExcelDataTableConfiguration()
{
UseHeaderRow = true
}
});
DataTableCollection table = result.Tables;
DataTable resultTable = table[sheetName];
//Close FileStream
stream.Close();
//Return
return resultTable;
}
//Put data into a collection
static List<DataCollection> dataCollection = new List<DataCollection>();
public static void PopulateInCollection(string fileName, String sheetName)
{
DataTable table = ExcelToDataTable(fileName,sheetName);
//Iterate through the rows and columns of the Table
for(int row = 1; row <= table.Rows.Count; row++)
{
for (int column = 0; column < table.Columns.Count; column++)
{
DataCollection dataTable = new DataCollection()
{
rowNumber = row,
columnName = table.Columns[column].ColumnName,
columnValue = table.Rows[row - 1][column].ToString()
};
//Add all the details for each row
dataCollection.Add(dataTable);
}
}
}
//Find the correct excel file and sheet
public static void GetTestDataSet(String testName)
{
String[] testNameSplit = testName.Split('_');
String filePath = MyProps.Default.TestData //Add path
+ testNameSplit[1]
+ "."
+ "xlsx";
PopulateInCollection(filePath, testNameSplit[0]);
}
public static string ReadData(int rowNumber, string columnName)
{
try
{
//Retriving Data using LINQ to reduce amount of iterations
string data = (from collectionData in dataCollection
where collectionData.columnName == columnName && collectionData.rowNumber == rowNumber
select collectionData.columnValue).SingleOrDefault();
//var data = dataCollection.Where(collectionData => collectionData.columnName == columnName && collectionData.rowNumber == rowNumber).SingleOrDefault().columnValue;
return data.ToString();
}
catch (Exception e)
{
e.StackTrace.ToString();
return null;
}
}
}
class DataCollection
{
public int rowNumber { get; set; }
public string columnName { get; set; }
public string columnValue { get; set; }
}
I suspect that the ExcelDataFactory.GetTestDataSet method is called in the wrong place, but I really am stumped as to why this is happening. Any ideas would be greatly appreciated.
I did some quick changes to ExcelDataFactory class, I removed the static references and now PopulateInCollection method returns a List that is declared and initialized at the start of the class.
using ExcelDataReader;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Wizuda_Selenium_Test_Automation
{
class ExcelDataFactory
{
List<DataCollection> dataCollection = new List<DataCollection>();
private static DataTable ExcelToDataTable(String filename, String sheetName)
{
//Open file and returns as Stream
FileStream stream = File.Open(filename, FileMode.Open, FileAccess.Read);
//CreateOpenXmlReader via ExcelReaderFactory
IExcelDataReader excelReader = ExcelReaderFactory.CreateOpenXmlReader(stream); //.xlsx
//Return as DataSet and set the frist row as column name
DataSet result = excelReader.AsDataSet(new ExcelDataSetConfiguration()
{
ConfigureDataTable = (_) => new ExcelDataTableConfiguration()
{
UseHeaderRow = true
}
});
DataTableCollection table = result.Tables;
DataTable resultTable = table[sheetName];
//Close FileStream
stream.Close();
//Return
return resultTable;
}
//static List<DataCollection> dataCollection = new List<DataCollection>();
public List<DataCollection> PopulateInCollection(string fileName, String sheetName)
{
dataCollection = new List<DataCollection>();
DataTable table = ExcelToDataTable(fileName,sheetName);
//Iterate through the rows and columns of the Table
for(int row = 1; row <= table.Rows.Count; row++)
{
for (int column = 0; column < table.Columns.Count; column++)
{
DataCollection dataTable = new DataCollection()
{
rowNumber = row,
columnName = table.Columns[column].ColumnName,
columnValue = table.Rows[row - 1][column].ToString()
};
//Add all the details for each row
dataCollection.Add(dataTable);
}
}
return dataCollection;
}
public string ReadData(int rowNumber, string columnName)
{
try
{
//Retriving Data using LINQ to reduce amount of iterations
string data = (from collectionData in dataCollection
where collectionData.columnName == columnName && collectionData.rowNumber == rowNumber
select collectionData.columnValue).SingleOrDefault();
//var data = dataCollection.Where(collectionData => collectionData.columnName == columnName && collectionData.rowNumber == rowNumber).SingleOrDefault().columnValue;
return data.ToString();
}
catch (Exception e)
{
e.StackTrace.ToString();
return null;
}
}
public void GetTestDataSet(String testName)
{
String[] testNameSplit = testName.Split('_');
String filePath = MyProps.Default.TestData //Add path
+ testNameSplit[1] //LoginTestSuite
+ "."
+ "xlsx"; //T101
PopulateInCollection(filePath, testNameSplit[0]);
}
}
class DataCollection
{
public int rowNumber { get; set; }
public string columnName { get; set; }
public string columnValue { get; set; }
}
}
I updated the test to create new instance of ExcelDataFactory
[Test,TestCase(1),TestCase(2)]
public void T101_LoginTestSuite_Valid(int rowNum)
{
ExcelDataFactory excelDataFactory = new ExcelDataFactory();
Console.WriteLine(TestContext.CurrentContext.Test.MethodName);
Console.WriteLine("Row number "+rowNum);
excelDataFactory.GetTestDataSet(TestContext.CurrentContext.Test.MethodName);
Console.WriteLine("username= "+ excelDataFactory.ReadData(rowNum,"username"));
Console.WriteLine("password= "+ excelDataFactory.ReadData(rowNum, "password"));
}
And now the test passes
I guess I need to go back and re-learn about the use static methods, thanks Kritner

Difficulties with C# + Excel

I have a problem.
I have an Excel File where sometimes the same customer is in 2 rows.
But not always.
Its like:
Click
Now, i want to create a DataGrid in C# which can list this in one row like:
Click
I know it would be easier to change the Excel file, but assume it wouldnt work that way(we get the file like this and we cant change it)
I know i could too just make another Excel File and make it with Excel(already did it this way, but we would need it more as C#)
Now i am at this point:
private void button2_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog() { Filter = "Excel Arbeitsmappe |*.xlsx", ValidateNames = true };
if (ofd.ShowDialog() == DialogResult.OK)
textBox1.Text = ofd.SafeFileName;
Excel.Application excelApp = new Excel.Application();
excelApp.Visible = false;
string filename = ofd.FileName;
Excel.Workbook workbook = excelApp.Workbooks.Open(filename);
Excel.Worksheet worksheet = workbook.Worksheets[1];
dataGridView1.ColumnCount = 2;
dataGridView1.Columns[0].Name = "Number";
dataGridView1.Columns[1].Name = "Street";
int rows = worksheet.UsedRange.Rows.Count;
for (int i = 2; i <= rows; i++)
{
string combinehr = worksheet.Cells[i, 150].Text + worksheet.Cells[i, 151].Text;
dataGridView1.Rows.Add(worksheet.Cells[i,29].Text, combinehr);
}
}
How do i expand it that it works like i want?
I would be so grateful
(sorry for the english)
With a reference to ExcelDataReader, ExcelDataReader.DataSet and DataSetExtensions (.Net) you can read the Excel file pretty easy into a DataSet, then you just have to work with the logic:
Input file:
using System;
using System.Data;
using System.IO;
using System.Linq;
using ExcelDataReader;
public DataTable GetTableFromExcel(string filePath)
{
DataSet ds = new DataSet();
using (var stream = System.IO.File.Open(filePath, System.IO.FileMode.Open, System.IO.FileAccess.Read))
{
using (var reader = ExcelReaderFactory.CreateReader(stream))
{
ds = reader.AsDataSet();
}
}
DataTable table = new DataTable();
table.Columns.Add(new DataColumn("CustomerNr"));
table.Columns.Add(new DataColumn("Address"));
// Set column names
for (int i = 0; i < ds.Tables[0].Columns.Count; i++)
{
// DataColumn.ColumnName can't be empty when DataColumn is part
// of a DataTable (throws ArgumentException)
string columnName = ds.Tables[0].Rows[0][i].ToString();
if (string.IsNullOrWhiteSpace(columnName))
{
columnName = $"Column{i}";
}
ds.Tables[0].Columns[i].ColumnName = columnName;
}
// Remove the first row containing the headers
ds.Tables[0].Rows.Remove(ds.Tables[0].Rows[0]);
// I don't have the benchmarks with me right now, but I've tested
// DataTable.Select vs DataTable.AsEnumerable.Select many times
// and the AsEnumerable method its faster, that's why you need the
// reference to System.Data.DataSetExtensions
var enumerableTable = ds.Tables[0].AsEnumerable();
// list of unique products
var products = enumerableTable.Select(row => row.Field<string>("Product")).Distinct();
// Add a column for each product
foreach (string product in products)
{
table.Columns.Add(new DataColumn(product));
}
// list of unique customers
var customerNumbers = enumerableTable.Select(row => row.Field<double>("CustomerNr")).Distinct();
foreach (var customerNumber in customerNumbers)
{
DataRow record = table.NewRow();
record["CustomerNr"] = customerNumber;
record["Address"] = enumerableTable.First(row => row.Field<double>("CustomerNr").Equals(customerNumber))[1];
for (int i = 2; i < table.Columns.Count; i++)
{
DataRow product = enumerableTable.FirstOrDefault(row => row.Field<double>("CustomerNr").Equals(customerNumber)
&& row.Field<string>("Product").Equals(table.Columns[i].ColumnName));
// Quantity = 0 if product is null
record[i] = product?["Quantity"] ?? 0;
}
table.Rows.Add(record);
}
return table;
}
Result DataTable:
The same result as #IvanGarcíaTopete via Microsoft.Office.Interop.Excel.
Class ExcelModel for Excel data:
public class ExcelModel
{
public string CustomerNr { get; set; }
public string Address { get; set; }
public string Product { get; set; }
public string Quantity { get; set; }
}
Read Excel and fill out model:
private void OpenReadExcel()
{
var dlg = new OpenFileDialog();
if (dlg.ShowDialog() != DialogResult.OK) return;
var exApp = new Microsoft.Office.Interop.Excel.Application();
Workbook exWbk = exApp.Workbooks.Open(dlg.FileName);
Worksheet wSh = exWbk.Sheets[1];
int k = 2;
Customers.Clear();
while (wSh.Cells[k, 1].Text != "" && wSh.Cells[k, 1].Value != null)
{
var rowExcelModel = new ExcelModel()
{
CustomerNr = wSh.Cells[k, 1].Text,
Address = wSh.Cells[k, 2].Text,
Product = wSh.Cells[k, 3].Text,
Quantity = wSh.Cells[k, 4].Text
};
Customers.Add(rowExcelModel);
k++;
}
exApp.Quit();
}
Generate Data table:
private void GenerateDataTable()
{
// unique products and customers
var products = Customers.Select(x => x.Product).Distinct().ToList();
var customers = Customers.Select(x => x.CustomerNr).Distinct().ToList();
// columns CustomerNr and Address
var dataTable = new System.Data.DataTable();
dataTable.Columns.Add(new DataColumn("CustomerNr"));
dataTable.Columns.Add(new DataColumn("Address"));
// columns for each product
foreach (var product in products)
{
dataTable.Columns.Add(new DataColumn(product));
}
//fill rows for each customers
foreach (var customer in customers)
{
var row = dataTable.NewRow();
row["CustomerNr"] = customer;
row["Address"] = Customers.Where(x => x.CustomerNr == customer).Select(x => x.Address).FirstOrDefault();
foreach (var product in products)
{
var quantity = Customers.Where(x => x.CustomerNr == customer && x.Product == product)
.Select(x => x.Quantity).FirstOrDefault();
row[product] = quantity ?? "0";
}
dataTable.Rows.Add(row);
}
dataGridView1.DataSource = dataTable;
}

Export DataGrid to text file

I'm new to programming (1st year of learning at college) and I'm working on a small application.
I have a window where user can retrieve data from SQL to DataGrid and a Button for exporting some data from a DataGrid data to a text file.
This is the code I've used to get data from SQL:
SqlConnection con = new SqlConnection("Server = localhost;Database = autoser; Integrated Security = true");
SqlCommand cmd = new SqlCommand("selectproduct", con); // Using a Store Procedure.
cmd.CommandType = CommandType.StoredProcedure;
DataTable dt = new DataTable("dtList");
cmd.Parameters.AddWithValue("#Code", txtbarcode.Text);
SqlDataAdapter da = new SqlDataAdapter(cmd);
da.Fill(dt);
data.ItemsSource = dt.DefaultView;
SqlDataAdapter adapt = new SqlDataAdapter(cmd);
DataSet ds = new DataSet();
adapt.Fill(ds);
con.Close();
int count = ds.Tables[0].Rows.Count;
if (count == 0)
{
MessageBox.Show("This product doesn't excist");
SystemSounds.Hand.Play();
}
else if (count == 1)
{
lblinfo.Visibility = Visibility.Visible;
SystemSounds.Asterisk.Play();
}
And this one is the code I used to write text file:
{
using (StreamWriter writer = new StreamWriter("D:\\test.txt", true))
{
writer.WriteLine("Welcome");
writer.WriteLine("E N T E R N E T");
}
using (StreamWriter writer = new StreamWriter("D:\\test.txt", true))
{
writer.WriteLine(data.Items);
}
using (StreamWriter writer = new StreamWriter("D:\\test.txt", true))
{
writer.WriteLine(data.Items);
}
// Append line to the file.
using (StreamWriter writer = new StreamWriter("D:\\test.txt", true))
{
writer.WriteLine("---------------------------------------");
writer.WriteLine(" Thank You! ");
writer.WriteLine(" " + DateTime.Now + " ");
}
}
When I Open the text file i get this data
Welcome
E N T E R N E T
System.Windows.Controls.ItemCollection - Why isn't show the data grid
data
---------------------------------------
Thank You
7/26/2018 12:38:37 PM
My question is: Where is my mistake that cause the data from the DataGrid to don't be showed in correct way?
Thanks in advance
You are using currently the following overload of the WriteLine method:
public virtual void WriteLine(object value)
If you look at the documentation of StreamWriter.WriteLine(object) it says that it:
Writes the text representation of an object by calling the ToString method on that object, followed by a line terminator to the text string or stream.
This is the reason why you get the following nice line in your file:
System.Windows.Controls.ItemCollection
The documentation of Object.ToString() method reveals that the
default implementations of the Object.ToString method return the fully qualified name of the object's type.
You would need to iterate through the collection and write each entry separately into the file. I would also suggest to use directly the data source instead of writing from the DataGrid.
foreach (DataRow row in dt.Rows)
{
object[] array = row.ItemArray;
writer.WriteLine(string.Join(" | ", array));
}
This is because data.Items is an ItemCollection and not a string.
All objects return the output of their ToString method when are asked to represent their contents as string. Normally you would override this method but in this case you can't.
So you need to tell the compiler how to retrieve the representative information from that collection. You can use either of these queries to fetch desired information out of the data grid:
var items = data.Items.AsQueryable().Cast<MyItemDataType>().Select(x => x.MyProperty);
var items = data.ItemsSource.Cast<MyItemDataType>().Select(x => x.MyProperty);
var items = data.Items.SourceCollection.AsQueryable().Cast<MyItemDataType>().Select(x => x.MyProperty);
items is a collection so you need to convert it to a string:
var text = items.Aggregate((x,y)=> x+", "+y);
MyItemDataType differs in each query and you have to find out yourself which data type is being used and MyProperty is the property in that class which represents the text of a row.
Edit
You can use this code too. It does the same thing:
string text = "";
for (int i = 0; i < data.Items.Count; i++)
{
text += data.Items[i].ToString();
if(i < data.Items.Count - 1)
text += ", ";
}
writer.WriteLine(text);
But pay attention to the data type of each item in data.Items[i].ToString(). For example if each item is of type int then data.Items[i].ToString() returns a string representing the value of that integer (e.g. 1 turns into "1") but if they are of other types (e.g. such as Customer or MyDataGridItem) you need to override ToString() method of that class to look something like this:
public class Customer{
//...
public override string ToString(){
return this.Id + " " + this.Name;
}
}
so if you cannot override this method for any reason you need to do the other approach:
string text = "";
for (int i = 0; i < data.Items.Count; i++)
{
Customer customer = data.Items[i] as Customer;//cast is required since type of Items[i] is object
text += (customer.Id + " " + customer.Name);
if(i < data.Items.Count - 1)
text += ", ";
}
writer.WriteLine(text);
furthermore, you can use a StringBuilder to speed up the string concatenation because += is slow on strings.
Look at the sample code here. This will do what you want.
public static void WriteDataToFile(DataTable submittedDataTable, string submittedFilePath)
{
int i = 0;
StreamWriter sw = null;
sw = new StreamWriter(submittedFilePath, false);
for (i = 0; i < submittedDataTable.Columns.Count - 1; i++)
{
sw.Write(submittedDataTable.Columns[i].ColumnName + ";");
}
sw.Write(submittedDataTable.Columns[i].ColumnName);
sw.WriteLine();
foreach (DataRow row in submittedDataTable.Rows)
{
object[] array = row.ItemArray;
for (i = 0; i < array.Length - 1; i++)
{
sw.Write(array[i].ToString() + ";");
}
sw.Write(array[i].ToString());
sw.WriteLine();
}
sw.Close();
}
Also, take a look at this.
using System;
using System.Web;
using System.IO;
using System.Data;
namespace WebApplication1
{
public partial class WebForm1 : System.Web.UI.Page
{
protected void Button1_Click(object sender, EventArgs e)
{
StreamWriter swExtLogFile = new StreamWriter("D:/Log/log.txt",true);
DataTable dt = new DataTable();
//Adding data To DataTable
dt.Columns.Add("ID");
dt.Columns.Add("Name");
dt.Columns.Add("Address");
dt.Rows.Add(1, "venki","Chennai");
dt.Rows.Add(2, "Hanu","London");
dt.Rows.Add(3, "john","Swiss");
int i;
swExtLogFile.Write(Environment.NewLine);
foreach (DataRow row in dt.Rows)
{
object[] array = row.ItemArray;
for (i = 0; i < array.Length - 1; i++)
{
swExtLogFile.Write(array[i].ToString() + " | ");
}
swExtLogFile.WriteLine(array[i].ToString());
}
swExtLogFile.Write("*****END OF DATA****"+DateTime.Now.ToString());
swExtLogFile.Flush();
swExtLogFile.Close();
}
}
}

C# Reading CSV to DataTable and Invoke Rows/Columns

i am currently working on a small Project and i got stuck with a Problem i currently can not manage to solve...
I have multiple ".CSV" Files i want to read, they all have the same Data just with different Values.
Header1;Value1;Info1
Header2;Value2;Info2
Header3;Value3;Info3
While reading the first File i Need to Create the Headers. The Problem is they are not splited in Columns but in rows (as you can see above Header1-Header3).
Then it Needs to read the Value 1 - Value 3 (they are listed in the 2nd Column) and on top of that i Need to create another Header -> Header4 with the data of "Info2" which is always placed in Column 3 and Row 2 (the other values of Column 3 i can ignore).
So the Outcome after the first File should look like this:
Header1;Header2;Header3;Header4;
Value1;Value2;Value3;Info2;
And after multiple files it sohuld be like this:
Header1;Header2;Header3;Header4;
Value1;Value2;Value3;Value4;
Value1b;Value2b;Value3b;Value4b;
Value1c;Value2c;Value3c;Value4c;
I tried it with OleDB but i get the Error "missing ISAM" which i cant mange to fix. The Code i Used is the following:
public DataTable ReadCsv(string fileName)
{
DataTable dt = new DataTable("Data");
/* using (OleDbConnection cn = new OleDbConnection("Provider=Microsoft.Jet.OLEDB.4.0;Data Source=\"" +
Path.GetDirectoryName(fileName) + "\";Extendet Properties ='text;HDR=yes;FMT=Delimited(,)';"))
*/
using (OleDbConnection cn = new OleDbConnection("Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" +
Path.GetDirectoryName(fileName) + ";Extendet Properties ='text;HDR=yes;FMT=Delimited(,)';"))
{
using(OleDbCommand cmd = new OleDbCommand(string.Format("select *from [{0}]", new FileInfo(fileName).Name,cn)))
{
cn.Open();
using(OleDbDataAdapter adapter = new OleDbDataAdapter(cmd))
{
adapter.Fill(dt);
}
}
}
return dt;
}
Another attempt i did was using StreamReader. But the Headers are in the wrong place and i dont know how to Change this + do this for every file. the Code i tried is the following:
public static DataTable ReadCsvFilee(string path)
{
DataTable oDataTable = new DataTable();
var fileNames = Directory.GetFiles(path);
foreach (var fileName in fileNames)
{
//initialising a StreamReader type variable and will pass the file location
StreamReader oStreamReader = new StreamReader(fileName);
// CONTROLS WHETHER WE SKIP A ROW OR NOT
int RowCount = 0;
// CONTROLS WHETHER WE CREATE COLUMNS OR NOT
bool hasColumns = false;
string[] ColumnNames = null;
string[] oStreamDataValues = null;
//using while loop read the stream data till end
while (!oStreamReader.EndOfStream)
{
String oStreamRowData = oStreamReader.ReadLine().Trim();
if (oStreamRowData.Length > 0)
{
oStreamDataValues = oStreamRowData.Split(';');
//Bcoz the first row contains column names, we will poluate
//the column name by
//reading the first row and RowCount-0 will be true only once
// CHANGE TO CHECK FOR COLUMNS CREATED
if (!hasColumns)
{
ColumnNames = oStreamRowData.Split(';');
//using foreach looping through all the column names
foreach (string csvcolumn in ColumnNames)
{
DataColumn oDataColumn = new DataColumn(csvcolumn.ToUpper(), typeof(string));
//setting the default value of empty.string to newly created column
oDataColumn.DefaultValue = string.Empty;
//adding the newly created column to the table
oDataTable.Columns.Add(oDataColumn);
}
// SET COLUMNS CREATED
hasColumns = true;
// SET RowCount TO 0 SO WE KNOW TO SKIP COLUMNS LINE
RowCount = 0;
}
else
{
// IF RowCount IS 0 THEN SKIP COLUMN LINE
if (RowCount++ == 0) continue;
//creates a new DataRow with the same schema as of the oDataTable
DataRow oDataRow = oDataTable.NewRow();
//using foreach looping through all the column names
for (int i = 0; i < ColumnNames.Length; i++)
{
oDataRow[ColumnNames[i]] = oStreamDataValues[i] == null ? string.Empty : oStreamDataValues[i].ToString();
}
//adding the newly created row with data to the oDataTable
oDataTable.Rows.Add(oDataRow);
}
}
}
//close the oStreamReader object
oStreamReader.Close();
//release all the resources used by the oStreamReader object
oStreamReader.Dispose();
}
return oDataTable;
}
I am thankful for everyone who is willing to help. And Thanks for reading this far!
Sincerely yours
If I understood you right, there is a strict parsing there like this:
string OpenAndParse(string filename, bool firstFile=false)
{
var lines = File.ReadAllLines(filename);
var parsed = lines.Select(l => l.Split(';')).ToArray();
var header = $"{parsed[0][0]};{parsed[1][0]};{parsed[2][0]};{parsed[1][0]}\n";
var data = $"{parsed[0][1]};{parsed[1][1]};{parsed[2][1]};{parsed[1][2]}\n";
return firstFile
? $"{header}{data}"
: $"{data}";
}
Where it would return - if first file:
Header1;Header2;Header3;Header2
Value1;Value2;Value3;Value4
if not first file:
Value1;Value2;Value3;Value4
If I am correct, rest is about running this against a list file of files and joining the results in an output file.
EDIT: Against a directory:
void ProcessFiles(string folderName, string outputFileName)
{
bool firstFile = true;
foreach (var f in Directory.GetFiles(folderName))
{
File.AppendAllText(outputFileName, OpenAndParse(f, firstFile));
firstFile = false;
}
}
Note: I missed you want a DataTable and not an output file. Then you could simply create a list and put the results into that list making the list the datasource for your datatable (then why would you use semicolons in there? Probably all you need is to simply attach the array values to a list).
(Adding as another answer just to make it uncluttered)
void ProcessMyFiles(string folderName)
{
List<MyData> d = new List<MyData>();
var files = Directory.GetFiles(folderName);
foreach (var file in files)
{
OpenAndParse(file, d);
}
string[] headers = GetHeaders(files[0]);
DataGridView dgv = new DataGridView {Dock=DockStyle.Fill};
dgv.DataSource = d;
dgv.ColumnAdded += (sender, e) => {e.Column.HeaderText = headers[e.Column.Index];};
Form f = new Form();
f.Controls.Add(dgv);
f.Show();
}
string[] GetHeaders(string filename)
{
var lines = File.ReadAllLines(filename);
var parsed = lines.Select(l => l.Split(';')).ToArray();
return new string[] { parsed[0][0], parsed[1][0], parsed[2][0], parsed[1][0] };
}
void OpenAndParse(string filename, List<MyData> d)
{
var lines = File.ReadAllLines(filename);
var parsed = lines.Select(l => l.Split(';')).ToArray();
var data = new MyData
{
Col1 = parsed[0][1],
Col2 = parsed[1][1],
Col3 = parsed[2][1],
Col4 = parsed[1][2]
};
d.Add(data);
}
public class MyData
{
public string Col1 { get; set; }
public string Col2 { get; set; }
public string Col3 { get; set; }
public string Col4 { get; set; }
}
I don't know if this is the best way to do this. But what i would have done in your case, is to rewrite the CSV's the conventionnal way while reading all the files, then create a stream containing the new CSV created.
It would look like something like this :
var csv = new StringBuilder();
csv.AppendLine("Header1;Header2;Header3;Header4");
foreach (var item in file)
{
var newLine = string.Format("{0},{1},{2},{3}", item.value1, item.value2, item.value3, item.value4);
csv.AppendLine(newLine);
}
//Create Stream
MemoryStream stream = new MemoryStream();
StreamReader reader = new StreamReader(stream);
//Fill your data table here with your values
Hope this will help.

Code performance (bulk copy/ reading multiple lines and saving in database from text file)

I am learning c# programming these days and need some help in determining the performance of code.
I have to read a file and some details from it.
File has 4 columns:
ID, dob, size, accountno.
Problem:I have to read every line and insert them into a database and there are more than 50000 entries per day.
Solution I tried:
Created a class with 4 properties (ID, dob, size, accountno.) and then I iterate through the file and convert all the data into objects and keep on adding them on ArraList. So, basically now I got an arraylist with 50000 objects.
Now, I iterate through the array at last and inserted the detail in database.
Is this correct approach ?
Experts please help.
code :
namespace testing
{
class Program
{
static void Main(string[] args)
{
string timestamp = DateTime.Now.ToString("yyyyMMddHHmmss");
string InputDirectory = #"My Documents\\2015";
string FileMask = "comb*.txt";
ArrayList al = new ArrayList();
string line;
var Files = Directory.GetFiles(InputDirectory, FileMask, SearchOption.AllDirectories).Select(f => Path.GetFullPath(f));
foreach (var f in Files)
{
using (StreamReader reader = new StreamReader(f))
{
string date;
while ((line = reader.ReadLine()) != null)
{
Datamodel dm = new Datamodel();
string[] values = line.Split(',').Select(sValue => sValue.Trim()).ToArray();
dm.ID = values[0].ToString();
dm.dob= dm.RPT_ID.Remove(0, 4);
dm.size= values[1].ToString();
dm.accountno= values[2].ToString();
al.Add(dm);
}
reader.Close();
}
}
utilityClass.Insert_Entry(al);
}
}
}
For additional SQL performance look into transactions:
connection.BeginTransaction();
//bulk insert commands here
connection.Commit();
My Solution :Thanks to all above comments.
namespace Test
{
class Program
{
static void Main(string[] args)
{
string timestamp = DateTime.Now.ToString("yyyyMMddHHmmss");
string InputDirectory = #"My Documents\\2015";
string FileMask = "comb*.txt";
try
{
string line = null;
var Files = Directory.GetFiles(InputDirectory, FileMask, SearchOption.AllDirectories).Select(f => Path.GetFullPath(f));
foreach (var f in Files)
{
DataTable table = new DataTable();
table.TableName = f;
table.Columns.Add("ID", typeof(Int64));
table.Columns.Add("dob", typeof(string));
table.Columns.Add("size", typeof(string));
table.Columns.Add("accountno", typeof(string));
using (StreamReader reader = new StreamReader(f))
{
while ((line = reader.ReadLine()) != null)
{
string[] values = line.Split(',').Select(sValue => sValue.Trim()).ToArray();
string uniqueGuid = SequentialGuidGenerator.NewGuid().ToString();
uniqueGuid = uniqueGuid.Replace("-", "");
int ID = convert.toint(values[0]);
string NOTIF_ID = "";
table.Rows.Add(ID,values[1].ToString(),values[2]).toString(),values[2]).toString());
}
reader.Close();
}
utilityClass.Insert_Entry(table, env);
}
}
catch (Exception e)
{
CustomException.Write(CustomException.CreateExceptionString(e));
}
}
}
}
Insert_Entry
using (SqlConnection con = new SqlConnection(utilityClass.GetConnectionString(environ)))
{
con.Open();
using (SqlBulkCopy bulkCopy = new SqlBulkCopy(con))
{
bulkCopy.DestinationTableName = "dbo.inserttablename";
try
{
bulkCopy.WriteToServer(mfsentdata);
}
catch (SqlException e)
{
CustomException.Write(CustomException.CreateExceptionString(e, mfsentdata.TableName));
}
}
con.Close();
}
}

Categories

Resources