Fastest method to write a DataSet to a character delimited file - c#

This is the fastest method I've found so far to retrieve a response recordset from an Oracle DB and write it out to a delimited file. Faster would be even better. Please offer suggestions.
Retrieving the result set:
using (var oracleConnection = new OracleConnection(ContextInfo.ConnectionString))
{
oracleConnection.Open();
try
{
using (var oracleCommand = new OracleCommand(extractToRun, OracleConnection))
{
oracleCommand.CommandType = CommandType.StoredProcedure;
oracleCommand.BindByName = true;
oracleCommand.FetchSize = oracleCommand.FetchSize * 128;
oracleCommand.InitialLONGFetchSize = 5000;
oracleCommand.Parameters.Add(refCursorOracleParameter);
oracleCommand.Parameters.Add(startDateOracleParameter);
oracleCommand.Parameters.Add(endDateOracleParameter);
oracleCommand.Parameters.Add(jobIdOracleParameter);
using (var oracleDataAdapter = new OracleDataAdapter(oracleCommand))
{
oracleDataAdapter.Fill(ds);
return ds;
}
}
}
finally
{
oracleConnection.Close();
oracleConnection.Dispose();
}
}
Processing the data and writing it out to the file:
public static void ExportDataTableToDelimitedFile(DataTable table, string filename, string encloseWith, string delimiter, bool includeHeader, string fieldsToExclude, bool fixedLengthValues)
{
String excludeList = String.Empty;
if (!String.IsNullOrEmpty(fieldsToExclude))
{
excludeList = fieldsToExclude.ToUpper();
}
using (FileStream fs = new FileStream(filename, FileMode.Append, FileAccess.Write, FileShare.ReadWrite, 131072, FileOptions.None))
{
BinaryWriter sw = new BinaryWriter(fs);
if (table.Rows.Count == 0)
{
sw.Write(String.Empty);
sw.Close();
sw.Dispose();
return;
}
//Handle header
if (includeHeader)
{
string header = String.Empty;
String formattedHeader = String.Empty;
foreach (DataColumn clm in table.Columns)
{
if (excludeList.Contains(clm.ColumnName.ToUpper()))
continue;
if (clm.ColumnName.Length > 0)
{
formattedHeader = String.Empty;
formattedHeader = encloseWith + clm.ColumnName + encloseWith;
if (header.Length > 0)
header = String.Join(delimiter, new string[] { header, formattedHeader });
else
header = formattedHeader;
}
}
sw.Write(header);
}
// handle values in data rows now
Boolean hasEnlosedCharacter = !String.IsNullOrEmpty(encloseWith);
ParallelOptions rowOptions = new ParallelOptions();
rowOptions.MaxDegreeOfParallelism = Environment.ProcessorCount;
Parallel.ForEach(table.Rows.Cast<DataRow>(), rowOptions, row =>
{
char[] rowValue = new char[8192];
Int32 rowValueIndex = 0;
string[] dcc = row.ItemArray.Select(field => field.ToString()).ToArray();
foreach (String dc in dcc)
{
if (rowValueIndex > 0)
{
if (!String.IsNullOrEmpty(dc) && hasEnlosedCharacter)
{
rowValue[rowValueIndex++] = delimiter[0];
rowValue[rowValueIndex++] = encloseWith[0];
foreach (char c in dc)
{
rowValue[rowValueIndex++] = c;
}
rowValue[rowValueIndex++] = encloseWith[0];
}
else
{
rowValue[rowValueIndex++] = delimiter[0];
foreach (char c in dc)
{
rowValue[rowValueIndex++] = c;
}
}
}
else
{
if (!String.IsNullOrEmpty(dc) && hasEnlosedCharacter)
{
rowValue[rowValueIndex++] = encloseWith[0];
foreach (char c in dc)
{
rowValue[rowValueIndex++] = c;
}
rowValue[rowValueIndex++] = encloseWith[0];
}
else
{
foreach (char c in dc)
{
rowValue[rowValueIndex++] = c;
}
}
}
}
rowValue[rowValueIndex++] = '\r';
rowValue[rowValueIndex++] = '\n';
lock (sw)
{
sw.Write(rowValue, 0, rowValueIndex);
}
});
sw.Close();
sw.Dispose();
table.Dispose();
fs.Close();
}
}
I know that I should rename some of the variables and handle the header the same way (I'm not writing headers) so this is really a pure logic question and style answers don't help improve the performance.
The puzzling thing is the network performance. It's only using 1.5% of the bandwidth when it's quickly returning 5 datasets with a few thousand rows? I'm using the latest ODP.Net (Oracle) against an 11g DB. I tried Devarts provider and it completely bombed for me.
Network Performance
The processor load reflects the affect of the Parallel.ForEach over the rows within a datatable, which is a good thing.
Processor Performance

THIS is the fastest I've been able to get it.
Retrieving the data:
public static DataTable GetData(String extractToRun, DateTime startDate, DateTime endDate)
{
//RefCursor
OracleParameter refCursorOracleParameter = new OracleParameter
{
ParameterName = "pCursor",
Direction = ParameterDirection.Output,
OracleDbType = OracleDbType.RefCursor
};
OracleParameter startDateOracleParameter = new OracleParameter
{
ParameterName = "pStartDate",
Direction = ParameterDirection.Input,
OracleDbType = OracleDbType.Varchar2,
Value = startDate
};
OracleParameter endDateOracleParameter = new OracleParameter
{
ParameterName = "pEndDate",
Direction = ParameterDirection.Input,
OracleDbType = OracleDbType.Varchar2,
Value = endDate
};
OracleParameter jobIdOracleParameter = new OracleParameter
{
ParameterName = "pJobId",
Direction = ParameterDirection.Input,
Value = "123456"
};
using (var oracleConnection = new OracleConnection(ContextInfo.ConnectionString))
{
oracleConnection.Open();
try
{
using (var oracleCommand = new OracleCommand(extractToRun, oracleConnection))
{
oracleCommand.CommandType = CommandType.StoredProcedure;
oracleCommand.BindByName = true;
oracleCommand.FetchSize = oracleCommand.FetchSize * 128;
oracleCommand.InitialLONGFetchSize = 5000;
oracleCommand.Parameters.Add(refCursorOracleParameter);
oracleCommand.Parameters.Add(startDateOracleParameter);
oracleCommand.Parameters.Add(endDateOracleParameter);
oracleCommand.Parameters.Add(jobIdOracleParameter);
using (OracleDataReader rdr = oracleCommand.ExecuteReader())
{
rdr.FetchSize = rdr.RowSize * 65536;
DataTable dt = new DataTable();
dt.MinimumCapacity = 400000;
dt.BeginLoadData();
dt.Load(rdr, LoadOption.Upsert);
dt.EndLoadData();
rdr.Close();
rdr.Dispose();
oracleCommand.Dispose();
return dt;
}
}
}
finally
{
oracleConnection.Close();
oracleConnection.Dispose();
}
}
}
Processing the data:
public static void ExportDataTableToDelimitedFile(DataTable table, string filename, string encloseWith, string delimiter, bool includeHeader, string fieldsToExclude, bool fixedLengthValues)
{
String excludeList = String.Empty;
if (!String.IsNullOrEmpty(fieldsToExclude))
{
excludeList = fieldsToExclude.ToUpper();
}
using (FileStream fs = new FileStream(filename, FileMode.Append, FileAccess.Write, FileShare.ReadWrite, 2097152, FileOptions.None))
{
BinaryWriter sw = new BinaryWriter(fs);
if (table.Rows.Count == 0)
{
sw.Write(String.Empty);
sw.Close();
sw.Dispose();
return;
}
//Handle header
if (includeHeader)
{
string header = String.Empty;
String formattedHeader = String.Empty;
foreach (DataColumn clm in table.Columns)
{
if (excludeList.Contains(clm.ColumnName.ToUpper()))
continue;
if (clm.ColumnName.Length > 0)
{
formattedHeader = String.Empty;
formattedHeader = encloseWith + clm.ColumnName + encloseWith;
if (header.Length > 0)
header = String.Join(delimiter, new string[] { header, formattedHeader });
else
header = formattedHeader;
}
}
sw.Write(header);
}
// handle values in data rows now
Boolean hasEnlosedCharacter = !String.IsNullOrEmpty(encloseWith);
Parallel.ForEach(table.Rows.Cast<DataRow>(), row =>
{
char[] rowValue = new char[8192];
Int32 rowValueIndex = 0;
char[][] rowData = row.ItemArray.Select(field => field.ToString().ToCharArray()).ToArray();
for (int i = 0; i < rowData.Length; i++)
{
Boolean useEnclosed = rowData[i].Length > 0 && hasEnlosedCharacter;
if (rowValueIndex > 0)
{
if (useEnclosed)
{
rowValue[rowValueIndex++] = delimiter[0];
rowValue[rowValueIndex++] = encloseWith[0];
rowData[i].CopyTo(rowValue, rowValueIndex);
rowValueIndex += rowData[i].Length;
rowValue[rowValueIndex++] = encloseWith[0];
}
else
{
rowValue[rowValueIndex++] = delimiter[0];
rowData[i].CopyTo(rowValue, rowValueIndex);
rowValueIndex += rowData[i].Length;
}
}
else
{
if (useEnclosed)
{
rowValue[rowValueIndex++] = encloseWith[0];
rowData[i].CopyTo(rowValue, rowValueIndex);
rowValueIndex += rowData[i].Length;
rowValue[rowValueIndex++] = encloseWith[0];
}
else
{
rowData[i].CopyTo(rowValue, rowValueIndex);
rowValueIndex += rowData[i].Length;
}
}
}
rowValue[rowValueIndex++] = '\r';
rowValue[rowValueIndex++] = '\n';
lock (sw)
{
sw.Write(rowValue, 0, rowValueIndex);
}
});
sw.Close();
sw.Dispose();
table.Dispose();
fs.Close();
}
}
There are several key points of note. DataReader into a DataTable using Load is 40% faster than Dataset.Fill BUT don't set the fetchsize above 64K. Performance degrades after that. 32K is probably about the best. Character arrays are MUCH faster than even StringBuilder. C# is crippled that we can't have Assembler subroutines, in my humble opinion. I've considered writing a C++ dll just so I could have an Assembly language subroutine to copy memory around. Then I wouldn't need to call ToCharArray(). Granted, I haven't looked at the IL to see exactly what ToCharArray() does but the performance analyzer points at that line of code as taking 26% of the time.
Surprisingly, these changes increase network utilization up to 4.5% (which is high for a single PC on a corporate network) AND it reduces the CPU utilization to around 80% because it's mainly now waiting on the disk write method instead of being busy copying strings around.
I didn't show the original code but it used to take 13-15 minutes to export the data to a pipe delimited file. With these changes it takes 40-45 seconds to export exactly the same data.
I also didn't show that the original DB query had seven queries in it all union all together. I broke those up so I could run them in parallel. Performance fixes need to be addressed as a whole. Many who tried to solve this problem before focused on the DB. Nobody really focused on the client side and tried to identify what the real problem is.
Hopefully this helps someone in the future.

Okay! Here is a BETTER answer!
public static List<ROW_DATA> GetData(String extractToRun, DateTime startDate, DateTime endDate)
{
List<ROW_DATA> dataTable = new List<ROW_DATA>();
//RefCursor
OracleParameter refCursorOracleParameter = new OracleParameter
{
ParameterName = "pCursor",
Direction = ParameterDirection.Output,
OracleDbType = OracleDbType.RefCursor
};
OracleParameter startDateOracleParameter = new OracleParameter
{
ParameterName = "pStartDate",
Direction = ParameterDirection.Input,
OracleDbType = OracleDbType.Varchar2,
Value = startDate
};
OracleParameter endDateOracleParameter = new OracleParameter
{
ParameterName = "pEndDate",
Direction = ParameterDirection.Input,
OracleDbType = OracleDbType.Varchar2,
Value = endDate
};
OracleParameter jobIdOracleParameter = new OracleParameter
{
ParameterName = "pJobId",
Direction = ParameterDirection.Input,
Value = "123456"
};
using (var oracleConnection = new OracleConnection(ContextInfo.ConnectionString))
{
oracleConnection.Open();
try
{
using (var oracleCommand = new OracleCommand(extractToRun, oracleConnection))
{
oracleCommand.CommandType = CommandType.StoredProcedure;
oracleCommand.BindByName = true;
oracleCommand.FetchSize = oracleCommand.FetchSize * 128;
oracleCommand.InitialLONGFetchSize = 5000;
oracleCommand.Parameters.Add(refCursorOracleParameter);
oracleCommand.Parameters.Add(startDateOracleParameter);
oracleCommand.Parameters.Add(endDateOracleParameter);
oracleCommand.Parameters.Add(jobIdOracleParameter);
using (OracleDataReader rdr = oracleCommand.ExecuteReader())
{
//byte[] columnBytes = new byte[16384];
Int32 tryCount = 0;
rdr.FetchSize = rdr.RowSize * 262144;
while (rdr.Read())
{
Int32 charLength = (Int32)rdr.GetChars(0, 0, null, 0, 0);
char[] colChars = new char[charLength];
rdr.GetChars(0, 0, colChars, 0, charLength);
//OracleString colValue = rdr.GetOracleString(0);
//int valueLength = colValue.Length;
//unsafe
//{
// fixed (char* pcolValue = colValue.Value)
// {
// fixed (byte* pcolBytes = columnBytes)
// {
// for (int i = 0; i < valueLength; i++)
// {
// pcolBytes[i] = (byte)pcolValue[i];
// }
// }
// }
//}
ROW_DATA rowData = new ROW_DATA { length = charLength, rowValues = colChars };
dataTable.Add(rowData);
}
}
rdr.Close();
rdr.Dispose();
oracleCommand.Dispose();
return dataTable;
}
}
}
finally
{
oracleConnection.Close();
oracleConnection.Dispose();
}
}
}
I purposely left in the commented out code to show I even tried unsafe code to get the data into the format I needed. Turns out, GetChars returns it just the way I want it so I can simply stream it to disk. I'm up to 11% network utilization and 27 seconds to retrieve 413K rows and write them to disk. I also modified the stored procedure to return a pipe delimited string so I only receive one column of data at the client. It's really fast but I have ideas to cut the time in half. Stay tuned.

Related

C# string array.length error when calling from a CSV file

I'm fairly new to C# and im writing a rental vehicle management system. I'm trying to retrieve all lines from a CSV file that is set up like this:
[Registration][Grade][Make][Model][Year][NumSeats][Transmission][Fuel][GPS][SunRoof][DailyRate][Colour]
[123ABC][Economy][Toyota][Camry][2005][5][Automatic][Petrol][No][No][30][White]
[234BCD][Economy][Ford][Focus][2012][5][Automatic][Petrol][Yes][No][45][Blue]
[987ZYX][Economy][Holden][Cruise][2016][5][Manual][Diesel][Yes][No][60][Red]
and then iterate it through a for loop before it's sent to another method.
In the following method beyond the one shown, it's being put into an ArrayList so that the values retrieved can be searched for by the user in the program.
I'm stuck on the for loop as it gives me an error on the vehicles1.Length; saying that vehicles1 is a use of an unassigned local variable. I don't know if initializing the array is my problem, because I've tried that and it gives me no errors but the program just breaks.
void setUpVehicles(out Fleet fleetVehicles)
{
const char DELIM = ',';
Vehicle veh = new Vehicle();
FileStream inFile = new FileStream(FILENAME3, FileMode.Open, FileAccess.Read);
StreamReader reader = new StreamReader(inFile);
string recordIn;
string[] vehicles1;
recordIn = reader.ReadLine();
while (recordIn != null)
{
string year = veh.Year.ToString();
string seats = veh.NumSeats.ToString();
string gps = veh.GPS.ToString();
string sunRoof = veh.SunRoof.ToString();
string dailyRate = veh.DailyRate.ToString();
vehicles1 = recordIn.Split(DELIM);
veh.Registration = vehicles1[0];
veh.Grade = vehicles1[1];
veh.Make = vehicles1[2];
veh.Model = vehicles1[3];
year = vehicles1[4];
seats = vehicles1[5];
veh.Transmission = vehicles1[6];
veh.Fuel = vehicles1[7];
gps = vehicles1[8];
sunRoof = vehicles1[9];
dailyRate = vehicles1[10];
veh.Colour = vehicles1[11];
}
fleetVehicles = new Fleet();
for (int i = 0; i < vehicles1.Length; i++)
{
fleetVehicles.insertVehicle(vehicles1[i]);
}
}
IEnumerable<Vehicle> setUpVehicles(string fileName)
{
using(var reader = new StreamReader(fileName))
using(var parser = new Microsoft.VisualBasic.TextFieldParser(reader))
{
parser.TextFieldType = Microsoft.VisualBasic.FileIO.FieldType.Delimited;
parser.Delimiters = new string[] {","};
string[] row;
while(!parser.EndOfData)
{
row = parser.ReadFields();
var vehicle = new Vehicle {
Registration = row[0],
Grade = row[1],
Make = row[2],
Model = row[3],
Year = row[4],
NumSeats = row[5],
Transmission = row[6],
Fuel = row[7],
GPS = row[8],
SunRoo = row[9],
DailyRate = row[10],
Colour = row[11]
};
yield return vehicle;
}
}
}
Then you would call it to make a fleet like this:
var fleetVehicles = new Fleet();
foreach(var vehicle in setUpVehicles(FILENAME3))
{
feetVehicles.insertVehicles(vehicle);
}

Why inserting data from DataTable to Database not in order?

I'm reading a CSV file with MemoryStream to insert all it's data in the datatable.
Why inserted data in the database is disordered compare to my DataTable in code?
this is sample code:
using (Stream stream = new MemoryStream(bytes))
{
using (TextFieldParser reader2 = new TextFieldParser(stream, Encoding.GetEncoding(1256)))
{
string lineString = "";
var firstLine = reader2.ReadLine();
lineString = reader2.ReadToEnd();
var largeText = lineString.Split(this.RecordDelimiter);
foreach (var item in largeText)
{
if (!string.IsNullOrEmpty(item))
{
DataRow row = dt.NewRow();
row[0] = _databaseRepository.GetSequencerNextVal("cms_batchfile_records_seq");
row[1] = Convert.ToInt32(requestId);
row[2] = DateAndTime.GetServerDate();
row[3] = DateAndTime.GetServerTime();
row[4] = null;
row[5] = null;
row[6] = Convert.ToInt32(formatId);
row[7] = (int)BatchFileRecords_ProcessStatusEnum.NotProcess;
row[8] = null;
row[9] = item;
dt.Rows.Add(row);
_log.Info("Service"+item);
}
}
}
}
_batchFileRecordsRepository.InsertDatas(dt);
public void InsertDatas(DataTable dt)
{
try
{
BatchFileRecords item;
for (int i = 0; i < dt.Rows.Count; i++)
{
item = new BatchFileRecords()
{
Record_id = Convert.ToInt32(dt.Rows[i].ItemArray[0]),
Request_id = Convert.ToInt32(dt.Rows[i].ItemArray[1]),
Record_date = dt.Rows[i].ItemArray[2].ToString(),
Record_time = dt.Rows[i].ItemArray[3].ToString(),
Process_date = dt.Rows[i].ItemArray[4].ToString(),
Process_time = dt.Rows[i].ItemArray[5].ToString(),
Dataformat_id = Convert.ToInt32(dt.Rows[i].ItemArray[6]),
Processstatus = Convert.ToInt32(dt.Rows[i].ItemArray[7]),
Processresult = dt.Rows[i].ItemArray[8].ToString(),
Data = dt.Rows[i].ItemArray[9].ToString()
};
_unitOfWork.Session.Save(item);
}
_unitOfWork.Commit();
}
catch (Exception ex)
{
throw;
}
}
for example my file is :
2506737718,WFP,t,t,13830811,241641941709,9190517705,,F,,,14160
2506156741,WFP,t,t,13840915,409742096044,9195690240,,F,,,14160
2506158590,WFP,t,t,13840318,038664583449,9182550182,,F,,,14160
in db is:
2506737718,WFP,t,t,13830811,241641941709,9190517705,,F,,,14160
2506158590,WFP,t,t,13840318,038664583449,9182550182,,F,,,14160
2506156741,WFP,t,t,13840915,409742096044,9195690240,,F,,,14160
primary key in db is Record_id that are 2506737718, 2506158590,2506156741.

How to set column Properties in SQL Server

I have to create a Copy of a Database on SQL Server.
On this way I got a connection to the new DB
ADODB.Connection connection = new ADODB.Connection();
OleDbConnectionStringBuilder builder = new System.Data.OleDb.OleDbConnectionStringBuilder();
builder["Provider"] = provider;
builder["Server"] = #"Themis\DEV";
builder["Database"] = file_name;
builder["Integrated Security"] = "SSPI";
string connection_string = builder.ConnectionString;
connection.Open(connection_string, null, null, 0);
return connection;
}
I create the tables with ADOX
ADOX.Catalog cat, Dictionary<string, ADOX.DataTypeEnum> columntype)
{
List<string> primaryKeysList = GetPrimaryKey(tabelle);
Key priKey = new Key();
Catalog catIn = new Catalog();
catIn.ActiveConnection = dbInfo.ConIn;
Dictionary<string, List<string>> indexinfo = new Dictionary<string, List<string>>();
GetSecondaryIndex(tabelle, indexinfo);
if (columntype.Count != 0) columntype.Clear();
if (size.Count != 0) size.Clear();
foreach (DataRow myField in schemaTable.Rows)
{
String columnNameValue = myField[columnName].ToString(); //SpaltenName
bool ich_darf_dbnull_sein = (bool)myField["AllowDBNull"];
ADOX.Column columne = new ADOX.Column();
columne.ParentCatalog = cat;
columne.Name = columnNameValue;
if (!columntype.ContainsKey(columnNameValue))
{
columntype.Add(columnNameValue, (ADOX.DataTypeEnum)myField["ProviderType"]);
}
columne.Type = (ADOX.DataTypeEnum)myField["ProviderType"];
//type.Add((ADODB.DataTypeEnum)myField["ProviderType"]);
columne.DefinedSize = (int)myField["ColumnSize"];
dbInfo.ColumnName = columnNameValue;
dbInfo.TableName = tabelle;
dbInfo.Column_size = (int)myField["ColumnSize"];
dbInfo.Column_Type = (ADOX.DataTypeEnum)myField["ProviderType"];
size.Add((int)myField["ColumnSize"]);
if (primaryKeysList.Contains(columnNameValue))
{
dbInfo.IsPrimary = true;
}
else dbInfo.IsPrimary = false;
object index = catIn.Tables[tabelle].Columns[columnNameValue].Attributes;
if (index.Equals(ColumnAttributesEnum.adColFixed) || (int)index == 3)
dbInfo.Fixed_length = true;
else
dbInfo.Fixed_length = false;
Console.WriteLine("{0}={1}", myField[columnName].ToString(), catIn.Tables[tabelle].Columns[columnNameValue].Attributes);
TargetDBMS.SetColumnProperties(columne, dbInfo);
switch (columne.Type)
{
case ADOX.DataTypeEnum.adChar:
case ADOX.DataTypeEnum.adWChar:
case ADOX.DataTypeEnum.adVarChar:
case ADOX.DataTypeEnum.adVarWChar:
columne.DefinedSize = (int)myField["ColumnSize"];
break;
default:
break;
}
if (primaryKeysList.Contains(columnNameValue))
{
priKey.Name = "PK_" + tabelle + "_" + columnNameValue;
primaryKeysList.Remove(columnNameValue);
priKey.Columns.Append(myField[columnName], (ADOX.DataTypeEnum)myField["ProviderType"], (int)myField["ColumnSize"]);
}
columnNameList.Add(columnNameValue);
table.Columns.Append(columne);
}
table.Keys.Append((object)priKey, KeyTypeEnum.adKeyPrimary);
}
But when I set the Properties for the columns I got an Exception
internal override void SetColumnProperties(ADOX.Column columne, DbInfo dbInfo)
{
GetColumnProperties(dbInfo);
columne.Properties["Autoincrement"].Value = dbInfo.Field_prop["Autoincrement"];
columne.Properties["Default"].Value = dbInfo.Field_prop["Default"];
columne.Properties["Nullable"].Value = dbInfo.Field_prop["Nullable"];
}
My Program works well for Access DB, but I cannot set it for the DB on SQL Server
Exception (0x80040E21) Multiple-step OLE DB operation generated errors. Check each OLE DB status value, if available. No work was done.
If I try this way
string query = "SELECT * FROM Forms";
DataTable dt = new DataTable();
using (SqlConnection sqlConn = Connection())
using (SqlCommand cmd = new SqlCommand(query, sqlConn))
{
sqlConn.Open();
dt.Load(cmd.ExecuteReader());
}
foreach (DataColumn col in dt.Columns)
{
Console.WriteLine(col.ColumnName);
col.AllowDBNull = true;
dt.AcceptChanges();
col.AutoIncrement = false;
dt.AcceptChanges();
}
it does not change the properties in the DB
The Problem is partially solved
columne.Properties["Autoincrement"].Value = (bool)dbInfo.Autoincrement;
because the dbInfo.Autoincrement was an object I have to write (bool) dbInfo.Autoincrement
Not solved is this
columne.Properties["Default"].Value = (string)dbInfo.Default_Value;
because the type of a value Default_Value can be 0, empty ("") or "-"...I don’t know what i can do in this case

Code performance (bulk copy/ reading multiple lines and saving in database from text file)

I am learning c# programming these days and need some help in determining the performance of code.
I have to read a file and some details from it.
File has 4 columns:
ID, dob, size, accountno.
Problem:I have to read every line and insert them into a database and there are more than 50000 entries per day.
Solution I tried:
Created a class with 4 properties (ID, dob, size, accountno.) and then I iterate through the file and convert all the data into objects and keep on adding them on ArraList. So, basically now I got an arraylist with 50000 objects.
Now, I iterate through the array at last and inserted the detail in database.
Is this correct approach ?
Experts please help.
code :
namespace testing
{
class Program
{
static void Main(string[] args)
{
string timestamp = DateTime.Now.ToString("yyyyMMddHHmmss");
string InputDirectory = #"My Documents\\2015";
string FileMask = "comb*.txt";
ArrayList al = new ArrayList();
string line;
var Files = Directory.GetFiles(InputDirectory, FileMask, SearchOption.AllDirectories).Select(f => Path.GetFullPath(f));
foreach (var f in Files)
{
using (StreamReader reader = new StreamReader(f))
{
string date;
while ((line = reader.ReadLine()) != null)
{
Datamodel dm = new Datamodel();
string[] values = line.Split(',').Select(sValue => sValue.Trim()).ToArray();
dm.ID = values[0].ToString();
dm.dob= dm.RPT_ID.Remove(0, 4);
dm.size= values[1].ToString();
dm.accountno= values[2].ToString();
al.Add(dm);
}
reader.Close();
}
}
utilityClass.Insert_Entry(al);
}
}
}
For additional SQL performance look into transactions:
connection.BeginTransaction();
//bulk insert commands here
connection.Commit();
My Solution :Thanks to all above comments.
namespace Test
{
class Program
{
static void Main(string[] args)
{
string timestamp = DateTime.Now.ToString("yyyyMMddHHmmss");
string InputDirectory = #"My Documents\\2015";
string FileMask = "comb*.txt";
try
{
string line = null;
var Files = Directory.GetFiles(InputDirectory, FileMask, SearchOption.AllDirectories).Select(f => Path.GetFullPath(f));
foreach (var f in Files)
{
DataTable table = new DataTable();
table.TableName = f;
table.Columns.Add("ID", typeof(Int64));
table.Columns.Add("dob", typeof(string));
table.Columns.Add("size", typeof(string));
table.Columns.Add("accountno", typeof(string));
using (StreamReader reader = new StreamReader(f))
{
while ((line = reader.ReadLine()) != null)
{
string[] values = line.Split(',').Select(sValue => sValue.Trim()).ToArray();
string uniqueGuid = SequentialGuidGenerator.NewGuid().ToString();
uniqueGuid = uniqueGuid.Replace("-", "");
int ID = convert.toint(values[0]);
string NOTIF_ID = "";
table.Rows.Add(ID,values[1].ToString(),values[2]).toString(),values[2]).toString());
}
reader.Close();
}
utilityClass.Insert_Entry(table, env);
}
}
catch (Exception e)
{
CustomException.Write(CustomException.CreateExceptionString(e));
}
}
}
}
Insert_Entry
using (SqlConnection con = new SqlConnection(utilityClass.GetConnectionString(environ)))
{
con.Open();
using (SqlBulkCopy bulkCopy = new SqlBulkCopy(con))
{
bulkCopy.DestinationTableName = "dbo.inserttablename";
try
{
bulkCopy.WriteToServer(mfsentdata);
}
catch (SqlException e)
{
CustomException.Write(CustomException.CreateExceptionString(e, mfsentdata.TableName));
}
}
con.Close();
}
}

SqlBulkCopy ColumnMapping error CSV to DataTable to SQL Server Table

I am trying to bulk insert a CSV file into a SQL Server database.
The process is .CSV file to DataTable to SqlBulkCopy to SQL Server.
When I run this I get this error:
The given ColumnMapping does not match up with any column in the source or destination
When I directly import the CSV into SQL Server via Management Studio it works! So I think the problem is my data table conversion?
Here is my code to go from .CSV to DataTable:
public DataTable CsvFileToDataTable(string filePath)
{
var csvData = new DataTable("Table1");
using (var csvReader = new TextFieldParser(filePath))
{
csvReader.SetDelimiters(new string[] { "," });
csvReader.HasFieldsEnclosedInQuotes = true;
var readFields = csvReader.ReadFields();
foreach (var dataColumn in readFields.Select(column => new DataColumn(column) {AllowDBNull = true }))
{
csvData.Columns.Add(dataColumn);
}
while (!csvReader.EndOfData)
{
var data = csvReader.ReadFields();
for (var i = 0; i < data.Length; i++)
{
if (data[i] == "")
{
data[i] = null;
}
}
csvData.Rows.Add(data);
}
}
return csvData;
}
And here is the code for the bulk copy insert:
public void InsertData(DataTable table)
{
using (var transactionScope = new TransactionScope())
{
using (var sqlConnection = new SqlConnection(this.ConnectionString))
{
sqlConnection.Open();
using (var sqlBulkCopy = new SqlBulkCopy(sqlConnection))
{
sqlBulkCopy.DestinationTableName = table.TableName;
foreach (var column in table.Columns)
{
sqlBulkCopy.ColumnMappings.Add(column.ToString(), column.ToString());
}
sqlBulkCopy.WriteToServer(table);
}
transactionScope.Complete();
}
}
}
Does anyone have any suggestions?
Thanks
Turns out If I tweked each method
foreach (var dataColumn in readFields.Select(column => new DataColumn(columntypeof(string)) { AllowDBNull = true, DefaultValue = string.Empty }))
{
csvData.Columns.Add(dataColumn);
}
and in the second Method I changed it to use an index rather than column name
for (var count = 0; count < table.Columns.Count; count++)
{
sqlBulkCopy.ColumnMappings.Add(count, count);
}
I was able to use these code samples to create a working solution that reads a csv, checks if there is data, and if there is clean out the data and import all the csv data to the table:
private static void ProcessFile(string FilePath, string TableName)
{
var dt = GetDataTable(FilePath, TableName);
if (dt == null)
{
return;
}
if (dt.Rows.Count == 0)
{
AuditLog.AddInfo("No rows imported after reading file " + FilePath);
return;
}
ClearData(TableName);
InsertData(dt);
}
private static DataTable GetDataTable(string FilePath, string TableName)
{
var dt = new DataTable(TableName);
using (var csvReader = new TextFieldParser(FilePath))
{
csvReader.SetDelimiters(new string[] { "," });
csvReader.HasFieldsEnclosedInQuotes = true;
var readFields = csvReader.ReadFields();
if (readFields == null)
{
AuditLog.AddInfo("Could not read header fields for file " + FilePath);
return null;
}
foreach (var dataColumn in readFields.Select(column => new DataColumn(column, typeof(string)) { AllowDBNull = true, DefaultValue = string.Empty }))
{
dt.Columns.Add(dataColumn);
}
while (!csvReader.EndOfData)
{
var data = csvReader.ReadFields();
if (data == null)
{
AuditLog.AddInfo(string.Format("Could not read fields on line {0} for file {1}", csvReader.LineNumber, FilePath));
continue;
}
var dr = dt.NewRow();
for (var i = 0; i < data.Length; i++)
{
if (!string.IsNullOrEmpty(data[i]))
{
dr[i] = data[i];
}
}
dt.Rows.Add(dr);
}
}
return dt;
}
private static void ClearData(string TableName)
{
SqlHelper.ExecuteNonQuery(ConfigurationUtil.ConnectionString, CommandType.Text, "TRUNCATE TABLE " + TableName);
}
private static void InsertData(DataTable table)
{
using (var sqlBulkCopy = new SqlBulkCopy(ConfigurationUtil.ConnectionString))
{
sqlBulkCopy.DestinationTableName = table.TableName;
for (var count = 0; count < table.Columns.Count; count++)
{
sqlBulkCopy.ColumnMappings.Add(count, count);
}
sqlBulkCopy.WriteToServer(table);
}
}

Categories

Resources