Async alternative to CsvDataReader? - c#

I'm trying to use CsvHelper to parse a CSV file and stream it to SQL Database. CsvDataReader class seems perfect for that, however it doesn't implement any async methods, which is weird to me, considering that underlying CsvReader has async versions of all of its reading methods. Is there a way to use CsvDataReader in an asynchronous way?

I've used this before to bulk import CSV files using Entity Framework. This is using LINQPad so you might need to make a couple of changes.
async Task Main()
{
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
MemberTypes = CsvHelper.Configuration.MemberTypes.Fields
};
using(var reader = new StreamReader(#"C:\MyBulkFile.csv"))
using(var csv = new CsvReader(reader, config))
{
var records = csv.GetRecords<Foo>();
await BulkInsertChunks(records);
}
}
async Task<int> BulkInsertChunks<T>(IEnumerable<T> entites, int chunkAmount = 10000)
{
var i = 0;
var j = 0;
var uploadRecords = new List<T>();
foreach (var record in entites)
{
if (i < chunkAmount - 1)
{
i++;
j++;
uploadRecords.Add(record);
}
else
{
i = 0;
j++;
uploadRecords.Add(record);
Console.WriteLine($"Uploading {j}");
await BulkInsertAll<T>(uploadRecords);
uploadRecords.Clear();
}
}
$"Uploading {j}".Dump();
await BulkInsertAll<T>(uploadRecords);
return j;
}
async Task BulkInsertAll<T>(List<T> entities)
{
var connstr = Connection.ConnectionString;
var conn = new SqlConnection(Connection.ConnectionString);
conn.Open();
Type t = typeof(T);
var tableAttribute = (TableAttribute)t.GetCustomAttributes(typeof(TableAttribute), false).Single();
var bulkCopy = new SqlBulkCopy(conn) { DestinationTableName = tableAttribute.Name };
var properties = t.GetMembers().Where(p =>
{
var columnAttribute = Attribute.GetCustomAttribute(p, typeof(ColumnAttribute)) as ColumnAttribute;
if (columnAttribute != null) return true;
return false;
}).ToArray();
var table = new DataTable();
foreach (var property in properties)
{
Type propertyType = ((FieldInfo)property).FieldType;
if (propertyType.IsGenericType && propertyType.GetGenericTypeDefinition() == typeof(Nullable<>))
{
propertyType = Nullable.GetUnderlyingType(propertyType);
}
table.Columns.Add(new DataColumn(property.Name, propertyType));
}
foreach (var entity in entities)
{
var cols = properties.Select(property =>
{
var field = (FieldInfo)property;
var o = field.GetValue(entity);
if (o == null)
return DBNull.Value;
else
return o;
}).ToArray();
table.Rows.Add(cols);
}
await bulkCopy.WriteToServerAsync(table);
conn.Close();
}

Related

Inserting large amounts of data using SqlBulkCopy and DbGeography data types

I am using an SqlBulkCopy to insery 10 000+ records into my DB but it won't work because of the DbGeography point.
I keep getting an exception message that gets thrown:
"Specified type is not registered on the target server.System.Data.Entity.Spatial.DbGeography".
This is my code.
public void AddBulkRange(string tableName, List<TEntity> entities)
{
using (var context = new TContext())
{
context.Configuration.AutoDetectChangesEnabled = false;
context.Configuration.ValidateOnSaveEnabled = false;
string conectionString = context.Database.Connection.ConnectionString;
using (var connection = new SqlConnection(conectionString))
{
connection.Open();
SqlTransaction transaction = connection.BeginTransaction();
using (var bulkCopy = new SqlBulkCopy(connection, SqlBulkCopyOptions.Default, transaction))
{
bulkCopy.BatchSize = 2000;
bulkCopy.DestinationTableName = "dbo." + tableName;
try
{
DataTable data = DataReaderConverterHelper.ToDataTable(entities);
//This just explicitly maps the columns in sqlBulkCopy to the table columns
foreach (DataColumn column in data.Columns)
{
bulkCopy.ColumnMappings.Add(new SqlBulkCopyColumnMapping(column.ColumnName, column.ColumnName));
}
bulkCopy.WriteToServer(data);
}
catch (Exception e)
{
transaction.Rollback();
connection.Close();
}
}
transaction.Commit();
}
}
}
This is the ToDataTable Method.
public static DataTable ToDataTable<TEntity>(this IList<TEntity> data)
{
PropertyDescriptorCollection properties = TypeDescriptor.GetProperties(typeof(TEntity));
DataTable dt = new DataTable();
for (int i = 0; i < properties.Count; i++)
{
PropertyDescriptor property = properties[i];
dt.Columns.Add(property.Name, Nullable.GetUnderlyingType(
property.PropertyType) ?? property.PropertyType);
}
object[] values = new object[properties.Count];
foreach (TEntity item in data)
{
for (int i = 0; i < values.Length; i++)
{
values[i] = properties[i].GetValue(item);
}
dt.Rows.Add(values);
}
return dt;
}
The problem is the DbGeography point is throwing the exception.
The field type in my model.
public System.Data.Entity.Spatial.DbGeography geography_point { get; set; }
In the DB the field type is Geography.
I can insert if I use the method below but the problem is I get a timeout error that's why I want to use the SqlBulkCopy but as I said above it doesn't work because of that one data type.
public List<TEntity> AddRange(List<TEntity> entities)
{
int takeCount = 100;
int counter = (entities.Count % takeCount) == 0
? (entities.Count / takeCount)
: (entities.Count / takeCount) + 1;
for (int i = 0; i < counter; i++)
{
using (var context = new TContext())
{
List<TEntity> subList = entities.Skip(i * takeCount).Take(takeCount).ToList();
context.Configuration.AutoDetectChangesEnabled = false;
context.Configuration.ValidateOnSaveEnabled = false;
context.Set<TEntity>().AddRange(subList);
context.SaveChanges();
context.Dispose();
}
}
return entities;
}
Thanks to some input from #AlwaysLearning I made some changes and got the DbGeography working with the Data Table.
public static DataTable ToDataTable<TEntity>(this IList<TEntity> data)
{
PropertyDescriptorCollection properties = TypeDescriptor.GetProperties(typeof(TEntity));
DataTable dt = new DataTable();
for (int i = 0; i < properties.Count; i++)
{
PropertyDescriptor property = properties[i];
if (property.PropertyType.Name == "DbGeography")
{
dt.Columns.Add(property.Name, typeof(SqlGeography));
continue;
}
dt.Columns.Add(property.Name, Nullable.GetUnderlyingType(
property.PropertyType) ?? property.PropertyType);
}
object[] values = new object[properties.Count];
foreach (TEntity item in data)
{
for (int i = 0; i < 20; i++)
{
if (properties[i].PropertyType.Name == "DbGeography")
{
DbGeography Point = (DbGeography)properties[i].GetValue(item);
SqlGeography newGeography = SqlGeography.Parse(Point.AsText()).MakeValid();
values[i] = newGeography;
continue;
}
values[i] = properties[i].GetValue(item);
}
dt.Rows.Add(values);
}
return dt;
}
All I did was convert the DbGeography data type to the SqlGeography type and it worked perfectly.

Import *.xlsx data via ExcelDataReader with 0 number of records affected

I use ExcelDataReader and ExcelDataReader.DataSet libraries to read over 1700 rows from and xlsx-file. Eventually, there are no records read. However if we restrict number of rows in a dile down to 9 - all the rows data are read correctly. Here is code:
private IEnumerable<T> ParseImportSourceFile<T>(string fileName, byte[] file) where T : CsvClassMap
{
IEnumerable<T> records = null;
var extensionEnd = Path.GetExtension(fileName).ToLower();
switch (Path.GetExtension(fileName).ToLower())
{
case ".csv":
break;
case ".xls":
case ".xlsx":
case ".ods":
records = new List<T>();
var memoryStream = new MemoryStream(file);
var streamReader = new StreamReader(memoryStream);
var reader = ExcelReaderFactory.CreateReader(streamReader.BaseStream);
if (extensionEnd == ".xlsx")
{
reader = ExcelReaderFactory.CreateOpenXmlReader(streamReader.BaseStream);
//reader.
}
//reader.AsDataSet().
try
{
var result = reader.AsDataSet(new ExcelDataSetConfiguration()
{
UseColumnDataType = true,
ConfigureDataTable = (tableReader) => new ExcelDataTableConfiguration()
{
EmptyColumnNamePrefix = "Column",
UseHeaderRow = true,
}
});
if (result.Tables.Count > 0)
{
var tbl = result.Tables[0];
//grab column names into array
var columns = new DataColumn[tbl.Columns.Count];
tbl.Columns.CopyTo(columns, 0);
//grab properties of T class
var tType = typeof(T);
var properties = tType.GetProperties(BindingFlags.Public | BindingFlags.Instance);
for (int i = 0; i < tbl.Rows.Count; i++)
{
//one row one record... create an instance for each row
var row = tbl.Rows[i];
var record = Activator.CreateInstance<T>();
//read all columns and try set valid property
foreach (var col in columns)
{
var prop = properties.FirstOrDefault(x => x.Name == col.ColumnName ||
(x.GetCustomAttribute<DisplayAttribute>() != null && x.GetCustomAttribute<DisplayAttribute>().Name == col.ColumnName));
if (prop != null && prop.CanWrite)
{
//
var sourceValue = row[col.ColumnName];
if(!(sourceValue is DBNull))
prop.SetValue(record, Parse(prop.PropertyType, sourceValue.ToString()));
}
}
((IList<T>)records).Add(record);
}
}
}
finally
{
if (reader != null) reader.Dispose();
if (streamReader != null) streamReader.Dispose();
if (memoryStream != null) memoryStream.Dispose();
}
break;
default:
throw new ImportException("Template is not valid.");
}
return records;
}
I use CreateOpenXmlReader method for xlsx file as it was suggessted on
stackoverflow.
tbl.Rows.Count is zero - from more than 1700 rows in xlsx file. Please help

How to Implement JSON Facet API in SolrNet

I want to make aggregate functions in Solr I found the way from this Post
But I can't implement it in SolrNet
How to Implement JSON Facet API in SolrNet
ISolrOperations<DeviceReadings> solr = connection.GetSolrInstance();
QueryOptions queryOption = new QueryOptions
{
Rows = 0,
FilterQueries = new ISolrQuery[] {
new SolrQueryByField("playerId", query.PlayerId.ToString()),
new SolrQueryByRange<DateTime>("dateTime", query.DateTimeFrom, query.DateTimeTo)
},
Facet = new FacetParameters
{
Queries = new List<ISolrFacetQuery>
{
new SolrFacetFieldQuery("heartRate")
}
}
};
queryOption.ExtraParams = new KeyValuePair<string, string>[] {
new KeyValuePair<string,string>("wt", "xml")
};
//Execute the query
solrResults = solr.Query(SolrQuery.All, queryOption);
Update
I did it using ExtraParams
queryOption.ExtraParams = new KeyValuePair<string, string>[] {
new KeyValuePair<string,string>("wt", "xml"),
new KeyValuePair<string,string>("json.facet", "{heartRateMin: 'min(heartRate)',heartRateMax: 'max(heartRate)',heartRateAvg: 'avg(heartRate)',distance: 'sum(distance)',calories: 'sum(calories)'}")
};
solrResults = await solr.QueryAsync(SolrQuery.All, queryOption);
ReadingsResponseExtraction extractResponse = new ReadingsResponseExtraction();
extractResponse.SetHeader(queryResponce, solrResults);
extractResponse.SetBody(queryResponce, solrResults);
extractResponse.SetFacets(queryResponce, solrResults);
//Return response;
return queryResponce;
ReadingsResponseExtraction.cs
internal class ReadingsResponseExtraction
{
//Extract parts of the SolrNet response and set them in QueryResponse class
internal void SetHeader(DeviceQueryResponse queryResponce, SolrQueryResults<DeviceReadings> solrResults)
{
queryResponce.QueryTime = solrResults.Header.QTime;
queryResponce.Status = solrResults.Header.Status;
queryResponce.TotalHits = solrResults.NumFound;
}
internal void SetBody(DeviceQueryResponse queryResponce, SolrQueryResults<DeviceReadings> solrResults)
{
queryResponce.Result = (List<DeviceReadings>)solrResults;
}
internal void SetFacets(DeviceQueryResponse queryResponse, SolrQueryResults<DeviceReadings> solrResults)
{
queryResponse.HeartRateMin = (int)solrResults.Stats["heartRate"].Min;
queryResponse.HeartRateMax = (int)solrResults.Stats["heartRate"].Max;
queryResponse.HeartRateAvg = (int)solrResults.Stats["heartRate"].Mean;
queryResponse.Distance = solrResults.Stats["distance"].Sum;
queryResponse.Calories = solrResults.Stats["calories"].Sum;
}
}
how to get these values from extraParames
To the best of my knowledge SolrNet does not yet have a .NET API supporting json.facet. However, you can always append extra query parameters via the QueryOptions.ExtraParams property. Based on your example:
queryOption.ExtraParams = new KeyValuePair<string, string>[] {
new KeyValuePair<string,string>("wt", "xml"),
new KeyValuePair<string,string("json.facet", "YOUR_JSON_FACET"),
};
YOUR_JSON_FACET can either just be a JSON string literal, or an object that is serialized into JSON. e.g.
var jsonFacet = new
{
heartRate = new {
type= "terms",
field= "heartRate",
}
};
JsonConvert.SerializeObject(jsonFacet, Formatting.None);
Next, you need to read the facet values out of the response from Solr. There are likely cleaner ways to do this, but one way that doesn't involve altering SolrNet internals is to write your own Query method that also outputs the raw XML. From that raw XML, you can just read the appropriate json.facet nodes.
public static SolrQueryResults<T> QueryWithRawXml<T>(this ISolrOperations<T> operations,
ISolrQuery query, QueryOptions queryOptions, out XDocument xml)
{
var executor = (SolrQueryExecuter<T>)ServiceLocator.Current.GetInstance<ISolrQueryExecuter<T>>();
var connectionKey = string.Format("{0}.{1}.{2}", typeof(SolrConnection), typeof(T), typeof(SolrConnection));
var connection = ServiceLocator.Current.GetInstance<ISolrConnection>(connectionKey);
var parser = ServiceLocator.Current.GetInstance<ISolrAbstractResponseParser<T>>();
var parameters = executor.GetAllParameters(query, queryOptions);
var responseXml = connection.Get(executor.Handler, parameters);
xml = XDocument.Parse(responseXml);
var results = new SolrQueryResults<T>();
parser.Parse(xml, results);
return results;
}
public IEnumerable<KeyValuePair<string, int> GetJsonFacets(
XDocument xml, string facetFieldName, string countFieldName = "count")
{
var response = xml.Element("response");
if (response == null)
{
yield break;
}
var mainFacetNode = response
.Elements("lst")
.FirstOrDefault(e => e.Attribute("name")?.Value == "facets");
if (mainFacetNode == null)
{
yield break;
}
var groupFacetElement = mainFacetNode
.Elements("lst")
.FirstOrDefault(x => x.Attribute("name")?.Value == facetFieldName);
if (groupFacetElement == null)
{
yield break;
}
var buckets = groupFacetElement.Elements("arr")
.FirstOrDefault(x => x.Attribute("name")?.Value == "buckets");
if (buckets == null)
{
yield break;
}
foreach (var bucket in buckets.Elements("lst"))
{
var valNode = bucket.Elements()
.FirstOrDefault(x => x.Attribute("name")?.Value == "val");
var countNode = bucket.Elements()
.FirstOrDefault(x => x.Attribute("name")?.Value == countFieldName);
int count;
if (valNode != null && countNode != null &&
int.TryParse(countNode.Value, out count))
{
yield return new KeyValuePair<string, int>(valNode.Value,count)
}
}
}

Adding model list

I have a list of the List of Request Types. With the For cycle, I have data from the vertaban. I want to add the dates into detail.
There is an error in the code I wrote. All data are listed as the same.
public IHttpActionResult TalepListele(TalepList model)
{
List<TalepList> detay = new List<TalepList>();
using (var ctx = new ktdbEntities())
{
var query = ctx.talepListele(model.KullaniciId, 0, 10).ToList();
var adet = query.Count;
if (query.Count != 0)
{
for (var i = 0; i < adet; i++)
{
model.OlusturmaTarihi = query[i].olusturulmaTarihi;
model.TalepDurumAdi = query[i].talepDurumuAdi;
model.TalepDurumId = query[i].talepTuruID;
model.TalepTuruAdi = query[i].talepTuruAdi;
model.TalepTuruId = query[i].talepTuruID;
model.talepID = query[i].talepID;
detay.Add(model);
}
return Ok(detay);
}
}
return Ok();
}
You are adding the same instance of TalepList to detay multiple times.
for (var i = 0; i < adet; i++)
{
TalepList mdl = new TalepList();
mdl.OlusturmaTarihi = query[i].olusturulmaTarihi;
...
detay.Add(mdl);
}
You can try this code and you should learn beetween reference type and value type differents.
public IHttpActionResult TalepListele(TalepList model)
{
List<TalepList> detay = new List<TalepList>();
using (var ctx = new ktdbEntities())
{
var query = ctx.talepListele(model.KullaniciId, 0, 10).ToList();
var adet = query.Count;
for (var i = 0; i < adet; i++)
{
TalepList talep = new TalepList();
talep.OlusturmaTarihi = query[i].olusturulmaTarihi;
talep.TalepDurumAdi = query[i].talepDurumuAdi;
talep.TalepDurumId = query[i].talepTuruID;
talep.TalepTuruAdi = query[i].talepTuruAdi;
talep.TalepTuruId = query[i].talepTuruID;
talep.talepID = query[i].talepID;
detay.Add(talep);
}
return Ok(detay);
}
return Ok();
}

SqlBulkCopy ColumnMapping error CSV to DataTable to SQL Server Table

I am trying to bulk insert a CSV file into a SQL Server database.
The process is .CSV file to DataTable to SqlBulkCopy to SQL Server.
When I run this I get this error:
The given ColumnMapping does not match up with any column in the source or destination
When I directly import the CSV into SQL Server via Management Studio it works! So I think the problem is my data table conversion?
Here is my code to go from .CSV to DataTable:
public DataTable CsvFileToDataTable(string filePath)
{
var csvData = new DataTable("Table1");
using (var csvReader = new TextFieldParser(filePath))
{
csvReader.SetDelimiters(new string[] { "," });
csvReader.HasFieldsEnclosedInQuotes = true;
var readFields = csvReader.ReadFields();
foreach (var dataColumn in readFields.Select(column => new DataColumn(column) {AllowDBNull = true }))
{
csvData.Columns.Add(dataColumn);
}
while (!csvReader.EndOfData)
{
var data = csvReader.ReadFields();
for (var i = 0; i < data.Length; i++)
{
if (data[i] == "")
{
data[i] = null;
}
}
csvData.Rows.Add(data);
}
}
return csvData;
}
And here is the code for the bulk copy insert:
public void InsertData(DataTable table)
{
using (var transactionScope = new TransactionScope())
{
using (var sqlConnection = new SqlConnection(this.ConnectionString))
{
sqlConnection.Open();
using (var sqlBulkCopy = new SqlBulkCopy(sqlConnection))
{
sqlBulkCopy.DestinationTableName = table.TableName;
foreach (var column in table.Columns)
{
sqlBulkCopy.ColumnMappings.Add(column.ToString(), column.ToString());
}
sqlBulkCopy.WriteToServer(table);
}
transactionScope.Complete();
}
}
}
Does anyone have any suggestions?
Thanks
Turns out If I tweked each method
foreach (var dataColumn in readFields.Select(column => new DataColumn(columntypeof(string)) { AllowDBNull = true, DefaultValue = string.Empty }))
{
csvData.Columns.Add(dataColumn);
}
and in the second Method I changed it to use an index rather than column name
for (var count = 0; count < table.Columns.Count; count++)
{
sqlBulkCopy.ColumnMappings.Add(count, count);
}
I was able to use these code samples to create a working solution that reads a csv, checks if there is data, and if there is clean out the data and import all the csv data to the table:
private static void ProcessFile(string FilePath, string TableName)
{
var dt = GetDataTable(FilePath, TableName);
if (dt == null)
{
return;
}
if (dt.Rows.Count == 0)
{
AuditLog.AddInfo("No rows imported after reading file " + FilePath);
return;
}
ClearData(TableName);
InsertData(dt);
}
private static DataTable GetDataTable(string FilePath, string TableName)
{
var dt = new DataTable(TableName);
using (var csvReader = new TextFieldParser(FilePath))
{
csvReader.SetDelimiters(new string[] { "," });
csvReader.HasFieldsEnclosedInQuotes = true;
var readFields = csvReader.ReadFields();
if (readFields == null)
{
AuditLog.AddInfo("Could not read header fields for file " + FilePath);
return null;
}
foreach (var dataColumn in readFields.Select(column => new DataColumn(column, typeof(string)) { AllowDBNull = true, DefaultValue = string.Empty }))
{
dt.Columns.Add(dataColumn);
}
while (!csvReader.EndOfData)
{
var data = csvReader.ReadFields();
if (data == null)
{
AuditLog.AddInfo(string.Format("Could not read fields on line {0} for file {1}", csvReader.LineNumber, FilePath));
continue;
}
var dr = dt.NewRow();
for (var i = 0; i < data.Length; i++)
{
if (!string.IsNullOrEmpty(data[i]))
{
dr[i] = data[i];
}
}
dt.Rows.Add(dr);
}
}
return dt;
}
private static void ClearData(string TableName)
{
SqlHelper.ExecuteNonQuery(ConfigurationUtil.ConnectionString, CommandType.Text, "TRUNCATE TABLE " + TableName);
}
private static void InsertData(DataTable table)
{
using (var sqlBulkCopy = new SqlBulkCopy(ConfigurationUtil.ConnectionString))
{
sqlBulkCopy.DestinationTableName = table.TableName;
for (var count = 0; count < table.Columns.Count; count++)
{
sqlBulkCopy.ColumnMappings.Add(count, count);
}
sqlBulkCopy.WriteToServer(table);
}
}

Categories

Resources