I have to create a Copy of a Database on SQL Server.
On this way I got a connection to the new DB
ADODB.Connection connection = new ADODB.Connection();
OleDbConnectionStringBuilder builder = new System.Data.OleDb.OleDbConnectionStringBuilder();
builder["Provider"] = provider;
builder["Server"] = #"Themis\DEV";
builder["Database"] = file_name;
builder["Integrated Security"] = "SSPI";
string connection_string = builder.ConnectionString;
connection.Open(connection_string, null, null, 0);
return connection;
}
I create the tables with ADOX
ADOX.Catalog cat, Dictionary<string, ADOX.DataTypeEnum> columntype)
{
List<string> primaryKeysList = GetPrimaryKey(tabelle);
Key priKey = new Key();
Catalog catIn = new Catalog();
catIn.ActiveConnection = dbInfo.ConIn;
Dictionary<string, List<string>> indexinfo = new Dictionary<string, List<string>>();
GetSecondaryIndex(tabelle, indexinfo);
if (columntype.Count != 0) columntype.Clear();
if (size.Count != 0) size.Clear();
foreach (DataRow myField in schemaTable.Rows)
{
String columnNameValue = myField[columnName].ToString(); //SpaltenName
bool ich_darf_dbnull_sein = (bool)myField["AllowDBNull"];
ADOX.Column columne = new ADOX.Column();
columne.ParentCatalog = cat;
columne.Name = columnNameValue;
if (!columntype.ContainsKey(columnNameValue))
{
columntype.Add(columnNameValue, (ADOX.DataTypeEnum)myField["ProviderType"]);
}
columne.Type = (ADOX.DataTypeEnum)myField["ProviderType"];
//type.Add((ADODB.DataTypeEnum)myField["ProviderType"]);
columne.DefinedSize = (int)myField["ColumnSize"];
dbInfo.ColumnName = columnNameValue;
dbInfo.TableName = tabelle;
dbInfo.Column_size = (int)myField["ColumnSize"];
dbInfo.Column_Type = (ADOX.DataTypeEnum)myField["ProviderType"];
size.Add((int)myField["ColumnSize"]);
if (primaryKeysList.Contains(columnNameValue))
{
dbInfo.IsPrimary = true;
}
else dbInfo.IsPrimary = false;
object index = catIn.Tables[tabelle].Columns[columnNameValue].Attributes;
if (index.Equals(ColumnAttributesEnum.adColFixed) || (int)index == 3)
dbInfo.Fixed_length = true;
else
dbInfo.Fixed_length = false;
Console.WriteLine("{0}={1}", myField[columnName].ToString(), catIn.Tables[tabelle].Columns[columnNameValue].Attributes);
TargetDBMS.SetColumnProperties(columne, dbInfo);
switch (columne.Type)
{
case ADOX.DataTypeEnum.adChar:
case ADOX.DataTypeEnum.adWChar:
case ADOX.DataTypeEnum.adVarChar:
case ADOX.DataTypeEnum.adVarWChar:
columne.DefinedSize = (int)myField["ColumnSize"];
break;
default:
break;
}
if (primaryKeysList.Contains(columnNameValue))
{
priKey.Name = "PK_" + tabelle + "_" + columnNameValue;
primaryKeysList.Remove(columnNameValue);
priKey.Columns.Append(myField[columnName], (ADOX.DataTypeEnum)myField["ProviderType"], (int)myField["ColumnSize"]);
}
columnNameList.Add(columnNameValue);
table.Columns.Append(columne);
}
table.Keys.Append((object)priKey, KeyTypeEnum.adKeyPrimary);
}
But when I set the Properties for the columns I got an Exception
internal override void SetColumnProperties(ADOX.Column columne, DbInfo dbInfo)
{
GetColumnProperties(dbInfo);
columne.Properties["Autoincrement"].Value = dbInfo.Field_prop["Autoincrement"];
columne.Properties["Default"].Value = dbInfo.Field_prop["Default"];
columne.Properties["Nullable"].Value = dbInfo.Field_prop["Nullable"];
}
My Program works well for Access DB, but I cannot set it for the DB on SQL Server
Exception (0x80040E21) Multiple-step OLE DB operation generated errors. Check each OLE DB status value, if available. No work was done.
If I try this way
string query = "SELECT * FROM Forms";
DataTable dt = new DataTable();
using (SqlConnection sqlConn = Connection())
using (SqlCommand cmd = new SqlCommand(query, sqlConn))
{
sqlConn.Open();
dt.Load(cmd.ExecuteReader());
}
foreach (DataColumn col in dt.Columns)
{
Console.WriteLine(col.ColumnName);
col.AllowDBNull = true;
dt.AcceptChanges();
col.AutoIncrement = false;
dt.AcceptChanges();
}
it does not change the properties in the DB
The Problem is partially solved
columne.Properties["Autoincrement"].Value = (bool)dbInfo.Autoincrement;
because the dbInfo.Autoincrement was an object I have to write (bool) dbInfo.Autoincrement
Not solved is this
columne.Properties["Default"].Value = (string)dbInfo.Default_Value;
because the type of a value Default_Value can be 0, empty ("") or "-"...I don’t know what i can do in this case
Related
I've got some code that I have used to pull Google Analytics data with a c# console application and it works great. Whenever I try to use that same code in an SSIS script task I get the error "Error deserializing JSON credential data.". I get the error when running locally and when deployed. I've got all the libraries added to the GAC and I'm using the same version libraries and .net Framework as the console app. Anyone have any ideas?
public void Main()
{
string SQL_Script = null;
string ErrorMessage = string.Empty;
string ExceptionMessage = "No error";
// Declare the variables that you'll be pulling from Google Analytics into the database
DateTime GA_Session_Date = new DateTime();
DateTime GA_End_Date = new DateTime();
GA_End_Date = DateTime.Today.AddDays(-1);
string GA_TransactionId = null;
string GA_ChannelGrouping = null;
string GA_Source = null;
string GA_Medium = null;
string GA_Keyword = null;
string GA_Campaign = null;
string GA_Device_Category = null;
string GA_Region = null;
int GA_Transactions = 0;
/*
* Get the last SessionDate loaded
*/
GA_Session_Date = Convert.ToDateTime(GetMaxSessionnDate());
GA_Session_Date = GA_Session_Date.AddDays(-1);
/*
* Delete the last SessionDate loaded from the table
*
* The free version of Google Analytics takes up to 24 hours to bake
* so reloading the last day will ensure that we get all of the data.
*/
SQL_Script = "DELETE FROM OmniChannelAnalytics.GoogleAnalytics.Transactions WHERE SessionDate >= '" + GA_Session_Date.ToString() + "';";
ErrorMessage = ExecuteSQL(SQL_Script);
/*
* Create the DataTable and DataSet to house the data from GA until
* it is bulk loaded into SQL
*/
DataSet dataSet = new DataSet();
DataTable sessionTable = new DataTable();
sessionTable.TableName = "Sessions";
// Add the columns to the Sessions table
sessionTable.Columns.Add("SessionDate", typeof(string));
sessionTable.Columns.Add("TransactionId", typeof(string));
sessionTable.Columns.Add("ChannelGrouping", typeof(string));
sessionTable.Columns.Add("Source", typeof(string));
sessionTable.Columns.Add("Medium", typeof(string));
sessionTable.Columns.Add("Keyword", typeof(string));
sessionTable.Columns.Add("Campaign", typeof(string));
sessionTable.Columns.Add("DeviceCategory", typeof(string));
sessionTable.Columns.Add("Region", typeof(string));
sessionTable.Columns.Add("Transactions", typeof(int));
sessionTable.Columns.Add("LoadDate", typeof(string));
dataSet.Tables.Add(sessionTable);
while (GA_Session_Date <= GA_End_Date)
{
try
{
var credential = Google.Apis.Auth.OAuth2.GoogleCredential.FromFile(GlobalVariables.GA_ClientSecretFileLocation)
.CreateScoped(new[] { Google.Apis.AnalyticsReporting.v4.AnalyticsReportingService.Scope.AnalyticsReadonly });
using (var analytics = new Google.Apis.AnalyticsReporting.v4.AnalyticsReportingService(new Google.Apis.Services.BaseClientService.Initializer
{
HttpClientInitializer = credential
}))
{
var request = analytics.Reports.BatchGet(new GetReportsRequest
{
ReportRequests = new[] {
new ReportRequest{
DateRanges = new[] { new DateRange{ StartDate = GA_Session_Date.ToString("yyyy-MM-dd"), EndDate = GA_Session_Date.ToString("yyyy-MM-dd") } },
Dimensions = new[] {
new Dimension{ Name = "ga:transactionId" }
, new Dimension { Name = "ga:channelGrouping" }
, new Dimension { Name = "ga:sourceMedium" }
, new Dimension { Name = "ga:keyword" }
, new Dimension { Name = "ga:campaign" }
, new Dimension { Name = "ga:deviceCategory" }
, new Dimension { Name = "ga:region" }
},
Metrics = new[] { new Metric{ Expression = "ga:transactions", Alias = "Transactions"}},
ViewId = GlobalVariables.GA_View_ID
}
}
});
var response = request.Execute();
foreach (var row in response.Reports[0].Data.Rows)
{
GA_TransactionId = row.Dimensions[0];
GA_ChannelGrouping = row.Dimensions[1];
GA_Source = row.Dimensions[2].Substring(0, row.Dimensions[2].IndexOf("/")).Trim().Replace("'", "''");
GA_Medium = row.Dimensions[2].Substring(row.Dimensions[2].IndexOf("/") + 1, row.Dimensions[2].Length - row.Dimensions[2].IndexOf("/") - 1).Trim().Replace("'", "''");
GA_Keyword = row.Dimensions[3];
GA_Campaign = row.Dimensions[4];
GA_Device_Category = row.Dimensions[5];
GA_Region = row.Dimensions[6];
foreach (var metric in row.Metrics)
{
GA_Transactions = Convert.ToInt32(metric.Values[0]);
}
// Populate the data table to hold until everything is bulk loaded into SQL
DataRow newRow = sessionTable.NewRow();
newRow["SessionDate"] = GA_Session_Date;
newRow["TransactionId"] = GA_TransactionId;
newRow["ChannelGrouping"] = GA_ChannelGrouping;
newRow["Source"] = GA_Source;
newRow["Medium"] = GA_Medium;
newRow["Keyword"] = GA_Keyword;
newRow["Campaign"] = GA_Campaign;
newRow["DeviceCategory"] = GA_Device_Category;
newRow["Region"] = GA_Region;
newRow["Transactions"] = GA_Transactions;
newRow["LoadDate"] = DateTime.Now;
sessionTable.Rows.Add(newRow);
} // foreach (var row in rows)
}
} // try
catch (Exception ex)
{
ExceptionMessage = ex.Message;
}
finally
{
// Import the current day's Session data
foreach (DataTable table in dataSet.Tables)
{
ImportTable(table);
}
sessionTable.Clear();
}
// Iterate the session date to import by 1
GA_Session_Date = GA_Session_Date.AddDays(1);
} // while (GA_Session_Date <= GA_End_Date)
Dts.TaskResult = (int)ScriptResults.Success;
}
I'm reading a CSV file with MemoryStream to insert all it's data in the datatable.
Why inserted data in the database is disordered compare to my DataTable in code?
this is sample code:
using (Stream stream = new MemoryStream(bytes))
{
using (TextFieldParser reader2 = new TextFieldParser(stream, Encoding.GetEncoding(1256)))
{
string lineString = "";
var firstLine = reader2.ReadLine();
lineString = reader2.ReadToEnd();
var largeText = lineString.Split(this.RecordDelimiter);
foreach (var item in largeText)
{
if (!string.IsNullOrEmpty(item))
{
DataRow row = dt.NewRow();
row[0] = _databaseRepository.GetSequencerNextVal("cms_batchfile_records_seq");
row[1] = Convert.ToInt32(requestId);
row[2] = DateAndTime.GetServerDate();
row[3] = DateAndTime.GetServerTime();
row[4] = null;
row[5] = null;
row[6] = Convert.ToInt32(formatId);
row[7] = (int)BatchFileRecords_ProcessStatusEnum.NotProcess;
row[8] = null;
row[9] = item;
dt.Rows.Add(row);
_log.Info("Service"+item);
}
}
}
}
_batchFileRecordsRepository.InsertDatas(dt);
public void InsertDatas(DataTable dt)
{
try
{
BatchFileRecords item;
for (int i = 0; i < dt.Rows.Count; i++)
{
item = new BatchFileRecords()
{
Record_id = Convert.ToInt32(dt.Rows[i].ItemArray[0]),
Request_id = Convert.ToInt32(dt.Rows[i].ItemArray[1]),
Record_date = dt.Rows[i].ItemArray[2].ToString(),
Record_time = dt.Rows[i].ItemArray[3].ToString(),
Process_date = dt.Rows[i].ItemArray[4].ToString(),
Process_time = dt.Rows[i].ItemArray[5].ToString(),
Dataformat_id = Convert.ToInt32(dt.Rows[i].ItemArray[6]),
Processstatus = Convert.ToInt32(dt.Rows[i].ItemArray[7]),
Processresult = dt.Rows[i].ItemArray[8].ToString(),
Data = dt.Rows[i].ItemArray[9].ToString()
};
_unitOfWork.Session.Save(item);
}
_unitOfWork.Commit();
}
catch (Exception ex)
{
throw;
}
}
for example my file is :
2506737718,WFP,t,t,13830811,241641941709,9190517705,,F,,,14160
2506156741,WFP,t,t,13840915,409742096044,9195690240,,F,,,14160
2506158590,WFP,t,t,13840318,038664583449,9182550182,,F,,,14160
in db is:
2506737718,WFP,t,t,13830811,241641941709,9190517705,,F,,,14160
2506158590,WFP,t,t,13840318,038664583449,9182550182,,F,,,14160
2506156741,WFP,t,t,13840915,409742096044,9195690240,,F,,,14160
primary key in db is Record_id that are 2506737718, 2506158590,2506156741.
I am newbee to .net code. I can understand the code but, feeling difficult to do changes. I am calling the method "threading" and passing a object parameter . I want to fill data in that object to fill in the data table. But, I am getting an error at that point. I am pasting all the code below and please do the needful help.
The below is the error message I am getting:
"Object is not an ADODB.RecordSet or an ADODB.Record.\r\nParameter name: adodb"
and error is at step oleDA1.Fill(dt1, don);
public class Report
{
public string ScheduleId;
public string reportName;
public string Frequency;
public string Customer;
public string Code;
public string ReportPath;
public string ReportId;
public string ReportFormat;
public string StartDate;
public string EndDate;
}
public List<Report> Populatereport(object SSISreport)
{
List<Report> list = new List<Report>();
Report al = null;
bool fireAgain = true;
using (OleDbDataAdapter oleDA = new OleDbDataAdapter())
using (DataTable dt = new DataTable())
{
oleDA.Fill(dt, SSISreport);
foreach (DataRow _row in dt.Rows)
{
al = new Report();
al.reportName = _row["ReportName"].ToString();
al.ScheduleId = _row["ScheduleId"].ToString();
al.Frequency = _row["Frequency"].ToString();
al.Customer = _row["Customer"].ToString();
al.Code = _row["code"].ToString();
al.ReportId = _row["ReportId"].ToString();
al.ReportFormat = _row["ReportFormat"].ToString();
al.ReportPath = _row["ReportPath"].ToString();
al.StartDate = _row["StartDate"].ToString();
al.EndDate = _row["EndDate"].ToString();
list.Add(al);
}
}
return list;
}
private object threading(object don)
{
Report aa = new Report();
DataRow row1;
ReportEnv env = null;
using (OleDbDataAdapter oleDA1 = new OleDbDataAdapter())
using (DataTable dt1 = new DataTable())
{
oleDA1.Fill(dt1, don);--err0r at this point
row1 = dt1.Rows[0];
aa.reportName = row1["ReportName"].ToString();
aa.ScheduleId = row1["ScheduleId"].ToString();
aa.Frequency = row1["Frequency"].ToString();
aa.Customer = row1["Customer"].ToString();
aa.ColcoCode = row1["code"].ToString();
aa.ReportId = row1["ReportId"].ToString();
aa.ReportFormat = row1["ReportFormat"].ToString();
aa.ReportPath = row1["ReportPath"].ToString();
aa.StartDate = row1["StartDate"].ToString();
aa.EndDate = row1["EndDate"].ToString();
}
ParameterValue[] paramval = new ParameterValue[5];
paramval[0] = new ParameterValue();
paramval[0].Name = "Startdate";
paramval[0].Value = aa.StartDate;
paramval[1] = new ParameterValue();
paramval[1].Name = "Enddate";
paramval[1].Value = aa.EndDate;
paramval[2] = new ParameterValue();
paramval[2].Name = "ReportID";
paramval[2].Value = aa.ReportId;
paramval[3] = new ParameterValue();
paramval[3].Name = "Code";
paramval[3].Value = aa.Code;
paramval[4] = new ParameterValue();
paramval[4].Name = "Frequency";
paramval[4].Value = aa.Frequency;
ReportExecutionService rs = new ReportExecutionService();
rs.Credentials = System.Net.CredentialCache.DefaultCredentials;
rs.Url = "some url";
rs.LoadReport(aa.ReportPath, null);
rs.SetExecutionParameters(paramval, "en-GB");
String filename = env.Code + "_" + aa.reportName + DateTime.UtcNow.ToString("_dd-MM-yyyy_hh-mm-ss.fff") + "." + aa.ReportFormat;
//Render the report and generate pdf
Byte[] results;
string encoding = String.Empty;
string mimeType = String.Empty;
string extension = String.Empty;
Warning[] warnings = null;
string[] streamIDs = null;
string deviceInfo = null;
results = rs.Render(aa.ReportFormat, deviceInfo, out extension, out encoding, out mimeType, out warnings, out streamIDs);
using (FileStream stream = File.OpenWrite(path+ filename))
{
stream.Write(results, 0, results.Length);
}
return null;
}
public void Main()
{
List<Report> aq = new List<Report>();
aq = Populatereport(Dts.Variables["vnSource_SQL_Result"].Value);
for (int i = 0; i < aq.Count; i++)
{
threading(aq[i]);
}
}
Check your Report class, I guess it should be casted to Recordset or Record according to msdn: https://msdn.microsoft.com/en-us/library/5s322715(v=vs.110).aspx
This is the fastest method I've found so far to retrieve a response recordset from an Oracle DB and write it out to a delimited file. Faster would be even better. Please offer suggestions.
Retrieving the result set:
using (var oracleConnection = new OracleConnection(ContextInfo.ConnectionString))
{
oracleConnection.Open();
try
{
using (var oracleCommand = new OracleCommand(extractToRun, OracleConnection))
{
oracleCommand.CommandType = CommandType.StoredProcedure;
oracleCommand.BindByName = true;
oracleCommand.FetchSize = oracleCommand.FetchSize * 128;
oracleCommand.InitialLONGFetchSize = 5000;
oracleCommand.Parameters.Add(refCursorOracleParameter);
oracleCommand.Parameters.Add(startDateOracleParameter);
oracleCommand.Parameters.Add(endDateOracleParameter);
oracleCommand.Parameters.Add(jobIdOracleParameter);
using (var oracleDataAdapter = new OracleDataAdapter(oracleCommand))
{
oracleDataAdapter.Fill(ds);
return ds;
}
}
}
finally
{
oracleConnection.Close();
oracleConnection.Dispose();
}
}
Processing the data and writing it out to the file:
public static void ExportDataTableToDelimitedFile(DataTable table, string filename, string encloseWith, string delimiter, bool includeHeader, string fieldsToExclude, bool fixedLengthValues)
{
String excludeList = String.Empty;
if (!String.IsNullOrEmpty(fieldsToExclude))
{
excludeList = fieldsToExclude.ToUpper();
}
using (FileStream fs = new FileStream(filename, FileMode.Append, FileAccess.Write, FileShare.ReadWrite, 131072, FileOptions.None))
{
BinaryWriter sw = new BinaryWriter(fs);
if (table.Rows.Count == 0)
{
sw.Write(String.Empty);
sw.Close();
sw.Dispose();
return;
}
//Handle header
if (includeHeader)
{
string header = String.Empty;
String formattedHeader = String.Empty;
foreach (DataColumn clm in table.Columns)
{
if (excludeList.Contains(clm.ColumnName.ToUpper()))
continue;
if (clm.ColumnName.Length > 0)
{
formattedHeader = String.Empty;
formattedHeader = encloseWith + clm.ColumnName + encloseWith;
if (header.Length > 0)
header = String.Join(delimiter, new string[] { header, formattedHeader });
else
header = formattedHeader;
}
}
sw.Write(header);
}
// handle values in data rows now
Boolean hasEnlosedCharacter = !String.IsNullOrEmpty(encloseWith);
ParallelOptions rowOptions = new ParallelOptions();
rowOptions.MaxDegreeOfParallelism = Environment.ProcessorCount;
Parallel.ForEach(table.Rows.Cast<DataRow>(), rowOptions, row =>
{
char[] rowValue = new char[8192];
Int32 rowValueIndex = 0;
string[] dcc = row.ItemArray.Select(field => field.ToString()).ToArray();
foreach (String dc in dcc)
{
if (rowValueIndex > 0)
{
if (!String.IsNullOrEmpty(dc) && hasEnlosedCharacter)
{
rowValue[rowValueIndex++] = delimiter[0];
rowValue[rowValueIndex++] = encloseWith[0];
foreach (char c in dc)
{
rowValue[rowValueIndex++] = c;
}
rowValue[rowValueIndex++] = encloseWith[0];
}
else
{
rowValue[rowValueIndex++] = delimiter[0];
foreach (char c in dc)
{
rowValue[rowValueIndex++] = c;
}
}
}
else
{
if (!String.IsNullOrEmpty(dc) && hasEnlosedCharacter)
{
rowValue[rowValueIndex++] = encloseWith[0];
foreach (char c in dc)
{
rowValue[rowValueIndex++] = c;
}
rowValue[rowValueIndex++] = encloseWith[0];
}
else
{
foreach (char c in dc)
{
rowValue[rowValueIndex++] = c;
}
}
}
}
rowValue[rowValueIndex++] = '\r';
rowValue[rowValueIndex++] = '\n';
lock (sw)
{
sw.Write(rowValue, 0, rowValueIndex);
}
});
sw.Close();
sw.Dispose();
table.Dispose();
fs.Close();
}
}
I know that I should rename some of the variables and handle the header the same way (I'm not writing headers) so this is really a pure logic question and style answers don't help improve the performance.
The puzzling thing is the network performance. It's only using 1.5% of the bandwidth when it's quickly returning 5 datasets with a few thousand rows? I'm using the latest ODP.Net (Oracle) against an 11g DB. I tried Devarts provider and it completely bombed for me.
Network Performance
The processor load reflects the affect of the Parallel.ForEach over the rows within a datatable, which is a good thing.
Processor Performance
THIS is the fastest I've been able to get it.
Retrieving the data:
public static DataTable GetData(String extractToRun, DateTime startDate, DateTime endDate)
{
//RefCursor
OracleParameter refCursorOracleParameter = new OracleParameter
{
ParameterName = "pCursor",
Direction = ParameterDirection.Output,
OracleDbType = OracleDbType.RefCursor
};
OracleParameter startDateOracleParameter = new OracleParameter
{
ParameterName = "pStartDate",
Direction = ParameterDirection.Input,
OracleDbType = OracleDbType.Varchar2,
Value = startDate
};
OracleParameter endDateOracleParameter = new OracleParameter
{
ParameterName = "pEndDate",
Direction = ParameterDirection.Input,
OracleDbType = OracleDbType.Varchar2,
Value = endDate
};
OracleParameter jobIdOracleParameter = new OracleParameter
{
ParameterName = "pJobId",
Direction = ParameterDirection.Input,
Value = "123456"
};
using (var oracleConnection = new OracleConnection(ContextInfo.ConnectionString))
{
oracleConnection.Open();
try
{
using (var oracleCommand = new OracleCommand(extractToRun, oracleConnection))
{
oracleCommand.CommandType = CommandType.StoredProcedure;
oracleCommand.BindByName = true;
oracleCommand.FetchSize = oracleCommand.FetchSize * 128;
oracleCommand.InitialLONGFetchSize = 5000;
oracleCommand.Parameters.Add(refCursorOracleParameter);
oracleCommand.Parameters.Add(startDateOracleParameter);
oracleCommand.Parameters.Add(endDateOracleParameter);
oracleCommand.Parameters.Add(jobIdOracleParameter);
using (OracleDataReader rdr = oracleCommand.ExecuteReader())
{
rdr.FetchSize = rdr.RowSize * 65536;
DataTable dt = new DataTable();
dt.MinimumCapacity = 400000;
dt.BeginLoadData();
dt.Load(rdr, LoadOption.Upsert);
dt.EndLoadData();
rdr.Close();
rdr.Dispose();
oracleCommand.Dispose();
return dt;
}
}
}
finally
{
oracleConnection.Close();
oracleConnection.Dispose();
}
}
}
Processing the data:
public static void ExportDataTableToDelimitedFile(DataTable table, string filename, string encloseWith, string delimiter, bool includeHeader, string fieldsToExclude, bool fixedLengthValues)
{
String excludeList = String.Empty;
if (!String.IsNullOrEmpty(fieldsToExclude))
{
excludeList = fieldsToExclude.ToUpper();
}
using (FileStream fs = new FileStream(filename, FileMode.Append, FileAccess.Write, FileShare.ReadWrite, 2097152, FileOptions.None))
{
BinaryWriter sw = new BinaryWriter(fs);
if (table.Rows.Count == 0)
{
sw.Write(String.Empty);
sw.Close();
sw.Dispose();
return;
}
//Handle header
if (includeHeader)
{
string header = String.Empty;
String formattedHeader = String.Empty;
foreach (DataColumn clm in table.Columns)
{
if (excludeList.Contains(clm.ColumnName.ToUpper()))
continue;
if (clm.ColumnName.Length > 0)
{
formattedHeader = String.Empty;
formattedHeader = encloseWith + clm.ColumnName + encloseWith;
if (header.Length > 0)
header = String.Join(delimiter, new string[] { header, formattedHeader });
else
header = formattedHeader;
}
}
sw.Write(header);
}
// handle values in data rows now
Boolean hasEnlosedCharacter = !String.IsNullOrEmpty(encloseWith);
Parallel.ForEach(table.Rows.Cast<DataRow>(), row =>
{
char[] rowValue = new char[8192];
Int32 rowValueIndex = 0;
char[][] rowData = row.ItemArray.Select(field => field.ToString().ToCharArray()).ToArray();
for (int i = 0; i < rowData.Length; i++)
{
Boolean useEnclosed = rowData[i].Length > 0 && hasEnlosedCharacter;
if (rowValueIndex > 0)
{
if (useEnclosed)
{
rowValue[rowValueIndex++] = delimiter[0];
rowValue[rowValueIndex++] = encloseWith[0];
rowData[i].CopyTo(rowValue, rowValueIndex);
rowValueIndex += rowData[i].Length;
rowValue[rowValueIndex++] = encloseWith[0];
}
else
{
rowValue[rowValueIndex++] = delimiter[0];
rowData[i].CopyTo(rowValue, rowValueIndex);
rowValueIndex += rowData[i].Length;
}
}
else
{
if (useEnclosed)
{
rowValue[rowValueIndex++] = encloseWith[0];
rowData[i].CopyTo(rowValue, rowValueIndex);
rowValueIndex += rowData[i].Length;
rowValue[rowValueIndex++] = encloseWith[0];
}
else
{
rowData[i].CopyTo(rowValue, rowValueIndex);
rowValueIndex += rowData[i].Length;
}
}
}
rowValue[rowValueIndex++] = '\r';
rowValue[rowValueIndex++] = '\n';
lock (sw)
{
sw.Write(rowValue, 0, rowValueIndex);
}
});
sw.Close();
sw.Dispose();
table.Dispose();
fs.Close();
}
}
There are several key points of note. DataReader into a DataTable using Load is 40% faster than Dataset.Fill BUT don't set the fetchsize above 64K. Performance degrades after that. 32K is probably about the best. Character arrays are MUCH faster than even StringBuilder. C# is crippled that we can't have Assembler subroutines, in my humble opinion. I've considered writing a C++ dll just so I could have an Assembly language subroutine to copy memory around. Then I wouldn't need to call ToCharArray(). Granted, I haven't looked at the IL to see exactly what ToCharArray() does but the performance analyzer points at that line of code as taking 26% of the time.
Surprisingly, these changes increase network utilization up to 4.5% (which is high for a single PC on a corporate network) AND it reduces the CPU utilization to around 80% because it's mainly now waiting on the disk write method instead of being busy copying strings around.
I didn't show the original code but it used to take 13-15 minutes to export the data to a pipe delimited file. With these changes it takes 40-45 seconds to export exactly the same data.
I also didn't show that the original DB query had seven queries in it all union all together. I broke those up so I could run them in parallel. Performance fixes need to be addressed as a whole. Many who tried to solve this problem before focused on the DB. Nobody really focused on the client side and tried to identify what the real problem is.
Hopefully this helps someone in the future.
Okay! Here is a BETTER answer!
public static List<ROW_DATA> GetData(String extractToRun, DateTime startDate, DateTime endDate)
{
List<ROW_DATA> dataTable = new List<ROW_DATA>();
//RefCursor
OracleParameter refCursorOracleParameter = new OracleParameter
{
ParameterName = "pCursor",
Direction = ParameterDirection.Output,
OracleDbType = OracleDbType.RefCursor
};
OracleParameter startDateOracleParameter = new OracleParameter
{
ParameterName = "pStartDate",
Direction = ParameterDirection.Input,
OracleDbType = OracleDbType.Varchar2,
Value = startDate
};
OracleParameter endDateOracleParameter = new OracleParameter
{
ParameterName = "pEndDate",
Direction = ParameterDirection.Input,
OracleDbType = OracleDbType.Varchar2,
Value = endDate
};
OracleParameter jobIdOracleParameter = new OracleParameter
{
ParameterName = "pJobId",
Direction = ParameterDirection.Input,
Value = "123456"
};
using (var oracleConnection = new OracleConnection(ContextInfo.ConnectionString))
{
oracleConnection.Open();
try
{
using (var oracleCommand = new OracleCommand(extractToRun, oracleConnection))
{
oracleCommand.CommandType = CommandType.StoredProcedure;
oracleCommand.BindByName = true;
oracleCommand.FetchSize = oracleCommand.FetchSize * 128;
oracleCommand.InitialLONGFetchSize = 5000;
oracleCommand.Parameters.Add(refCursorOracleParameter);
oracleCommand.Parameters.Add(startDateOracleParameter);
oracleCommand.Parameters.Add(endDateOracleParameter);
oracleCommand.Parameters.Add(jobIdOracleParameter);
using (OracleDataReader rdr = oracleCommand.ExecuteReader())
{
//byte[] columnBytes = new byte[16384];
Int32 tryCount = 0;
rdr.FetchSize = rdr.RowSize * 262144;
while (rdr.Read())
{
Int32 charLength = (Int32)rdr.GetChars(0, 0, null, 0, 0);
char[] colChars = new char[charLength];
rdr.GetChars(0, 0, colChars, 0, charLength);
//OracleString colValue = rdr.GetOracleString(0);
//int valueLength = colValue.Length;
//unsafe
//{
// fixed (char* pcolValue = colValue.Value)
// {
// fixed (byte* pcolBytes = columnBytes)
// {
// for (int i = 0; i < valueLength; i++)
// {
// pcolBytes[i] = (byte)pcolValue[i];
// }
// }
// }
//}
ROW_DATA rowData = new ROW_DATA { length = charLength, rowValues = colChars };
dataTable.Add(rowData);
}
}
rdr.Close();
rdr.Dispose();
oracleCommand.Dispose();
return dataTable;
}
}
}
finally
{
oracleConnection.Close();
oracleConnection.Dispose();
}
}
}
I purposely left in the commented out code to show I even tried unsafe code to get the data into the format I needed. Turns out, GetChars returns it just the way I want it so I can simply stream it to disk. I'm up to 11% network utilization and 27 seconds to retrieve 413K rows and write them to disk. I also modified the stored procedure to return a pipe delimited string so I only receive one column of data at the client. It's really fast but I have ideas to cut the time in half. Stay tuned.
I am currently making a ASP.NET MVC application for users to upload other users to a database from csv and excel.
And while working on the csv-uploader method I have come across a problem where the TextFieldParser - csvReader only reads the first column name and then move on to try and add it to every column in the DataTable and thus it gives the "DuplicateNameException in System.Data.dll".
And I have tried to find a counter that helps against this and i have also changed the delimiter type resulting in every column staying in the first DataTable column instead, meaning that the delimiters is not the problem.
And there is a breakpoint so I could see that the colFields counter manages to find all columns in the csv-file though it only tries to add the first one to the DataTable.
When searching for the problem the only thing I come accross is when there is deliberate code to try and implement the same column over and over, but I do not think that I have anything like that.
The method just stop trying to add the next column and tries to add the same first one instead.
And as a result there is the "DuplicateNameException",
anyone can see why that is?
Here is my code and it is placed in a controller:
[HttpPost]
public ActionResult Import(HttpPostedFileBase file)
{
DataTable csvData = new DataTable();
if (file != null && file.ContentLength > 0)
{
try
{
using (TextFieldParser csvReader = new TextFieldParser(file.InputStream))
{
//TODO:Skapa en lista eller liknande för delimiter val om möjligt.
csvReader.SetDelimiters(new string[] { ";" });
csvReader.HasFieldsEnclosedInQuotes = false;
string[] colFields = csvReader.ReadFields();
foreach (string column in colFields)
{
//TODO:Nuvarande error: DuplicateNameException, den lägger Author title på två olika kolumner och ger exception när detta upptäcks.
DataColumn Titel = new DataColumn(column);
Titel.AllowDBNull = true;
csvData.Columns.Add(Titel);
DataColumn FirstName = new DataColumn(column);
FirstName.AllowDBNull = true;
csvData.Columns.Add(FirstName);
DataColumn LastName = new DataColumn(column);
LastName.AllowDBNull = true;
csvData.Columns.Add(LastName);
DataColumn AbstrNum = new DataColumn(column);
AbstrNum.AllowDBNull = true;
csvData.Columns.Add(AbstrNum);
DataColumn PosterTitel = new DataColumn(column);
PosterTitel.AllowDBNull = true;
csvData.Columns.Add(PosterTitel);
DataColumn Workshop = new DataColumn(column);
Workshop.AllowDBNull = true;
csvData.Columns.Add(Workshop);
DataColumn Keywords = new DataColumn(column);
Keywords.AllowDBNull = true;
csvData.Columns.Add(Keywords);
DataColumn Institution = new DataColumn(column);
Institution.AllowDBNull = true;
csvData.Columns.Add(Institution);
DataColumn CollabEmail = new DataColumn(column);
CollabEmail.AllowDBNull = true;
csvData.Columns.Add(CollabEmail);
}
while (!csvReader.EndOfData)
{
string[] fieldData = csvReader.ReadFields();
//Making empty value as null
for (int i = 0; i < fieldData.Length; i++)
{
if (fieldData[i] == "")
{
fieldData[i] = null;
}
}
csvData.Rows.Add(fieldData);
}
}
//Fortfarande i try...
SqlConnectionStringBuilder cb = new SqlConnectionStringBuilder();
cb.DataSource="HELMER/SQLEXPRESS";
cb.InitialCatalog="TestDB";
cb.IntegratedSecurity=true;
SqlConnection cnn = new SqlConnection(cb.ConnectionString);
SqlCommand cmd = new SqlCommand("SELECT * FROM csvData", cnn);
cnn.Open();
SqlDataReader rdr=cmd.ExecuteReader();
SqlBulkCopy sbc= new SqlBulkCopy("server=.;database=TestDB;" + "Integrated Security=SSPI");
sbc.DestinationTableName = "Users";
sbc.WriteToServer(rdr);
sbc.Close();
rdr.Close();
cnn.Close();
}
catch (Exception ex)
{
}
}
return RedirectToAction("Index", "Home", new { Message = "The Import was a success" });
}
I have taken some source material from this site:
http://www.morgantechspace.com/2013/10/import-csv-file-into-sql-server-using.html
And also I am still pretty new to this site, though it has been a week right about if there is anything I am doing wrong.
Try this :
foreach (string column in colFields)
{
DataColumn datecolumn = new DataColumn(column);
datecolumn.AllowDBNull = true;
csvData.Columns.Add(datecolumn);
}
Once you created the columns from colFields and you can add rows to the created columns.
while (!csvReader.EndOfData)
{
string[] fieldData = csvReader.ReadFields();
//Making empty value as null
for (int i = 0; i < fieldData.Length; i++)
{
if (fieldData[i] == "")
{
fieldData[i] = null;
}
}
csvData.Rows.Add(fieldData); // Here rows are added to the created columns
}
}