In an ASP.Net MVC4 application, I'm using the following code to process a Go To Webinar Attendees report (CSV format).
For some reason, the file that is being loaded is not being released by IIS and it is causing issues when attempting to process another file.
Do you see anything out of the ordinary here?
The CSVHelper (CsvReader) is from https://joshclose.github.io/CsvHelper/
public AttendeesData GetRecords(string filename, string webinarKey)
{
StreamReader sr = new StreamReader(Server.MapPath(filename));
CsvReader csvread = new CsvReader(sr);
csvread.Configuration.HasHeaderRecord = false;
List<AttendeeRecord> record = csvread.GetRecords<AttendeeRecord>().ToList();
record.RemoveRange(0, 7);
AttendeesData attdata = new AttendeesData();
attdata.Attendees = new List<Attendee>();
foreach (var rec in record)
{
Attendee aa = new Attendee();
aa.Webinarkey = webinarKey;
aa.FullName = String.Concat(rec.First_Name, " ", rec.Last_Name);
aa.AttendedWebinar = 0;
aa.Email = rec.Email_Address;
aa.JoinTime = rec.Join_Time.Replace(" CST", "");
aa.LeaveTime = rec.Leave_Time.Replace(" CST", "");
aa.TimeInSession = rec.Time_in_Session.Replace("hour", "hr").Replace("minute", "min");
aa.Makeup = 0;
aa.RegistrantKey = Registrants.Where(x => x.email == rec.Email_Address).FirstOrDefault().registrantKey;
List<string> firstPolls = new List<string>()
{
rec.Poll_1.Trim(), rec.Poll_2.Trim(),rec.Poll_3.Trim(),rec.Poll_4.Trim()
};
int pass1 = firstPolls.Count(x => x != "");
List<string> secondPolls = new List<string>()
{
rec.Poll_5.Trim(), rec.Poll_6.Trim(),rec.Poll_7.Trim(),rec.Poll_8.Trim()
};
int pass2 = secondPolls.Count(x => x != "");
aa.FirstPollCount = pass1;
aa.SecondPollCount = pass2;
if (aa.TimeInSession != "")
{
aa.AttendedWebinar = 1;
}
if (aa.FirstPollCount == 0 || aa.SecondPollCount == 0)
{
aa.AttendedWebinar = 0;
}
attdata.Attendees.Add(aa);
attendeeToDB(aa); // adds to Oracle DB using EF6.
}
// Should I call csvread.Dispose() here?
sr.Close();
return attdata;
}
Yes. You have to dispose objects too.
sr.Close();
csvread.Dispose();
sr.Dispose();
Better strategy to use using keyword.
You should use usings for your streamreaders and writers.
You should follow some naming conventions (Lists contains always multiple entries, rename record to records)
You should use clear names (not aa)
Related
I have difficulties understanding this example on how to use facets :
https://lucenenet.apache.org/docs/4.8.0-beta00008/api/Lucene.Net.Demo/Lucene.Net.Demo.Facet.SimpleFacetsExample.html
My goal is to create an index in which each document field have a facet, so that at search time i can choose which facets use to navigate data.
What i am confused about is setup of facets in index creation, to
summarize my question : is index with facets compatibile with
ReferenceManager?
Need DirectoryTaxonomyWriter to be actually written and persisted
on disk or it will embedded into the index itself and is just
temporary? I mean given the code
indexWriter.AddDocument(config.Build(taxoWriter, doc)); of the
example i expect it's temporary and will be embedded into the index (but then the example also show you need the Taxonomy to drill down facet). So can the Taxonomy be tangled in some way with the index so that the are handled althogeter with ReferenceManager?
If is not may i just use the same folder i use for storing index?
Here is a more detailed list of point that confuse me :
In my scenario i am indexing the document asyncrhonously (background process) and then fetching the indext ASAP throught ReferenceManager in ASP.NET application. I hope this way to fetch the index is compatibile with DirectoryTaxonomyWriter needed by facets.
Then i modified the code i write introducing the taxonomy writer as indicated in the example, but i am a bit confused, seems like i can't store DirectoryTaxonomyWriter into the same folder of index because the folder is locked, need i to persist it or it will be embedded into the index (so a RAMDirectory is enougth)? if i need to persist it in a different direcotry, can i safely persist it into subdirectory?
Here the code i am actually using :
private static void BuildIndex (IndexEntry entry)
{
string targetFolder = ConfigurationManager.AppSettings["IndexFolder"] ?? string.Empty;
//** LOG
if (System.IO.Directory.Exists(targetFolder) == false)
{
string message = #"Index folder not found";
_fileLogger.Error(message);
_consoleLogger.Error(message);
return;
}
var metadata = JsonConvert.DeserializeObject<IndexMetadata>(File.ReadAllText(entry.MetdataPath) ?? "{}");
string[] header = new string[0];
List<dynamic> csvRecords = new List<dynamic>();
using (var reader = new StreamReader(entry.DataPath))
{
CsvConfiguration csvConfiguration = new CsvConfiguration(CultureInfo.InvariantCulture);
csvConfiguration.AllowComments = false;
csvConfiguration.CountBytes = false;
csvConfiguration.Delimiter = ",";
csvConfiguration.DetectColumnCountChanges = false;
csvConfiguration.Encoding = Encoding.UTF8;
csvConfiguration.HasHeaderRecord = true;
csvConfiguration.IgnoreBlankLines = true;
csvConfiguration.HeaderValidated = null;
csvConfiguration.MissingFieldFound = null;
csvConfiguration.TrimOptions = CsvHelper.Configuration.TrimOptions.None;
csvConfiguration.BadDataFound = null;
using (var csvReader = new CsvReader(reader, csvConfiguration))
{
csvReader.Read();
csvReader.ReadHeader();
csvReader.Read();
header = csvReader.HeaderRecord;
csvRecords = csvReader.GetRecords<dynamic>().ToList();
}
}
string targetDirectory = Path.Combine(targetFolder, "Index__" + metadata.Boundle + "__" + DateTime.Now.ToString("yyyyMMdd_HHmmss") + "__" + Path.GetRandomFileName().Substring(0, 6));
System.IO.Directory.CreateDirectory(targetDirectory);
//** LOG
{
string message = #"..creating index : {0}";
_fileLogger.Information(message, targetDirectory);
_consoleLogger.Information(message, targetDirectory);
}
using (var dir = FSDirectory.Open(targetDirectory))
{
using (DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir))
{
Analyzer analyzer = metadata.GetAnalyzer();
var indexConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
using (IndexWriter writer = new IndexWriter(dir, indexConfig))
{
long entryNumber = csvRecords.Count();
long index = 0;
long lastPercentage = 0;
foreach (dynamic csvEntry in csvRecords)
{
Document doc = new Document();
IDictionary<string, object> dynamicCsvEntry = (IDictionary<string, object>)csvEntry;
var indexedMetadataFiled = metadata.IdexedFields;
foreach (string headField in header)
{
if (indexedMetadataFiled.ContainsKey(headField) == false || (indexedMetadataFiled[headField].NeedToBeIndexed == false && indexedMetadataFiled[headField].NeedToBeStored == false))
continue;
var field = new Field(headField,
((string)dynamicCsvEntry[headField] ?? string.Empty).ToLower(),
indexedMetadataFiled[headField].NeedToBeStored ? Field.Store.YES : Field.Store.NO,
indexedMetadataFiled[headField].NeedToBeIndexed ? Field.Index.ANALYZED : Field.Index.NO
);
doc.Add(field);
var facetField = new FacetField(headField, (string)dynamicCsvEntry[headField]);
doc.Add(facetField);
}
long percentage = (long)(((decimal)index / (decimal)entryNumber) * 100m);
if (percentage > lastPercentage && percentage % 10 == 0)
{
_consoleLogger.Information($"..indexing {percentage}%..");
lastPercentage = percentage;
}
writer.AddDocument(doc);
index++;
}
writer.Commit();
}
}
}
//** LOG
{
string message = #"Index Created : {0}";
_fileLogger.Information(message, targetDirectory);
_consoleLogger.Information(message, targetDirectory);
}
}
I was using this piece of code till today and it was working fine:
for (int page = 1; page <= reader.NumberOfPages; page++)
{
var cpage = reader.GetPageN(page);
var content = cpage.Get(PdfName.CONTENTS);
var ir = (PRIndirectReference)content;
var value = reader.GetPdfObject(ir.Number);
if (value.IsStream())
{
PRStream stream = (PRStream)value;
var streamBytes = PdfReader.GetStreamBytes(stream);
var tokenizer = new PRTokeniser(new RandomAccessFileOrArray(streamBytes));
try
{
while (tokenizer.NextToken())
{
if (tokenizer.TokenType == PRTokeniser.TK_STRING)
{
string strs = tokenizer.StringValue;
if (!(br = excludeList.Any(st => strs.Contains(st))))
{
//strfor += tokenizer.StringValue;
if (!string.IsNullOrWhiteSpace(strs) &&
!stringsList.Any(i => i == strs && excludeHeaders.Contains(strs)))
stringsList.Add(strs);
}
}
}
}
finally
{
tokenizer.Close();
}
}
}
But today I got an exception for some pdf file: Unable to cast object of type 'iTextSharp.text.pdf.PdfArray' to type 'iTextSharp.text.pdf.PRIndirectReference
On debugging I got to know that the error is at this line: var ir = (PRIndirectReference)content;. That's because the pdf content that I'm extracting, I get it in the form of ArrayList, as you can see from the below image:
It would be really grateful if anyone can help me with this. Thanks in advance.
EDIT :
The pdf contents are paragraphs, tables, headers & footers, images in few cases. But I'm not bothered of images as I'm bypassing them.
As you can see from the code I'm trying to add the words into a string list, so I expect the output as plain text; words to be specific.
That was real easy! Don't know why I couldn't make out.
PdfReader reader = new PdfReader(name);
List<string> stringsList = new List<string>();
for (int page = 1; page <= reader.NumberOfPages; page++)
{
//directly get the contents into a byte stream
var streamByte = reader.GetPageContent(page);
var tokenizer = new PRTokeniser(new RandomAccessFileOrArray(streamByte));
var sb = new StringBuilder(); //use a string builder instead
try
{
while (tokenizer.NextToken())
{
if (tokenizer.TokenType == PRTokeniser.TK_STRING)
{
var currentText = tokenizer.StringValue;
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
sb.Append(tokenizer.StringValue);
}
}
}
finally
{
//add appended strings into a string list
if(sb != null)
stringsList.Add(sb.ToString());
tokenizer.Close();
}
}
i have to import 2 CSV's.
CSV 1 [49]: Including about 50 tab seperated colums.
CSV 2:[2] Inlcudes 3 Columns which should be replaced on the [3] [6] and [11] place of my first csv.
So heres what i do:
1) Importing the csv and split into a array.
string employeedatabase = "MYPATH";
List<String> status = new List<String>();
StreamReader file2 = new System.IO.StreamReader(filename);
string line = file2.ReadLine();
while ((line = file2.ReadLine()) != null)
{
string[] ud = line.Split('\t');
status.Add(ud[0]);
}
String[] ud_status = status.ToArray();
PROBLEM 1: i have about 50 colums to handle, ud_status is just the first, so do i need 50 Lists and 50 String arrays?
2) Importing the second csv and split into a array.
List<String> vorname = new List<String>();
List<String> nachname = new List<String>();
List<String> username = new List<String>();
StreamReader file = new System.IO.StreamReader(employeedatabase);
string line3 = file.ReadLine();
while ((line3 = file.ReadLine()) != null)
{
string[] data = line3.Split(';');
vorname.Add(data[0]);
nachname.Add(data[1]);
username.Add(data[2]);
}
String[] db_vorname = vorname.ToArray();
String[] db_nachname = nachname.ToArray();
String[] db_username = username.ToArray();
PROBLEM 2: After loading these two csv's i dont know how to combine them, and change to columns as mentioned above ..
somethine like this?
mynewArray = ud_status + "/t" + ud_xy[..n] + "/t" + changed_colum + ud_xy[..n];
save "mynewarray" into tablulator seperated csv with encoding "utf-8".
To read the file into a meaningful format, you should set up a class that defines the format of your CSV:
public class CsvRow
{
public string vorname { get; set; }
public string nachname { get; set; }
public string username { get; set; }
public CsvRow (string[] data)
{
vorname = data[0];
nachname = data[1];
username = data[2];
}
}
Then populate a list of this:
List<CsvRow> rows = new List<CsvRow>();
StreamReader file = new System.IO.StreamReader(employeedatabase);
string line3 = file.ReadLine();
while ((line3 = file.ReadLine()) != null)
{
rows.Add(new CsvRow(line3.Split(';'));
}
Similarly format your other CSV and include unused properties for the new fields. Once you have loaded both, you can populate the new properties from this list in a loop, matching the records by whatever common field the CSVs hopefully share. Then finally output the resulting data to a new CSV file.
Your solution is not to use string arrays to do this. That will just drive you crazy. It's better to use the System.Data.DataTable object.
I didn't get a chance to test the LINQ lambda expression at the end of this (or really any of it, I wrote this on a break), but it should get you on the right track.
using (var ds = new System.Data.DataSet("My Data"))
{
ds.Tables.Add("File0");
ds.Tables.Add("File1");
string[] line;
using (var reader = new System.IO.StreamReader("FirstFile"))
{
//first we get columns for table 0
foreach (string s in reader.ReadLine().Split('\t'))
ds.Tables["File0"].Columns.Add(s);
while ((line = reader.ReadLine().Split('\t')) != null)
{
//and now the rest of the data.
var r = ds.Tables["File0"].NewRow();
for (int i = 0; i <= line.Length; i++)
{
r[i] = line[i];
}
ds.Tables["File0"].Rows.Add(r);
}
}
//we could probably do these in a loop or a second method,
//but you may want subtle differences, so for now we just do it the same way
//for file1
using (var reader2 = new System.IO.StreamReader("SecondFile"))
{
foreach (string s in reader2.ReadLine().Split('\t'))
ds.Tables["File1"].Columns.Add(s);
while ((line = reader2.ReadLine().Split('\t')) != null)
{
//and now the rest of the data.
var r = ds.Tables["File1"].NewRow();
for (int i = 0; i <= line.Length; i++)
{
r[i] = line[i];
}
ds.Tables["File1"].Rows.Add(r);
}
}
//you now have these in functioning datatables. Because we named columns,
//you can call them by name specifically, or by index, to replace in the first datatable.
string[] columnsToReplace = new string[] { "firstColumnName", "SecondColumnName", "ThirdColumnName" };
for(int i = 0; i < ds.Tables[0].Rows.Count; i++)
{
//you didn't give a sign of any relation between the two tables
//so this is just by row, and assumes the row count is equivalent.
//This is also not advised.
//if there is a key these sets of data share
//you should join on them instead.
foreach(DataRow dr in ds.Tables[0].Rows[i].ItemArray)
{
dr[3] = ds.Tables[1].Rows[i][columnsToReplace[0]];
dr[6] = ds.Tables[1].Rows[i][columnsToReplace[1]];
dr[11] = ds.Tables[1].Rows[i][columnsToReplace[2]];
}
}
//ds.Tables[0] now has the output you want.
string output = String.Empty;
foreach (var s in ds.Tables[0].Columns)
output = String.Concat(output, s ,"\t");
output = String.Concat(output, Environment.NewLine); // columns ready, now the rows.
foreach (DataRow r in ds.Tables[0].Rows)
output = string.Concat(output, r.ItemArray.SelectMany(t => (t.ToString() + "\t")), Environment.NewLine);
if(System.IO.File.Exists("MYPATH"))
using (System.IO.StreamWriter file = new System.IO.StreamWriter("MYPATH")) //or a variable instead of string literal
{
file.Write(output);
}
}
With Cinchoo ETL - an open source file helper library, you can do the merge of CSV files as below. Assumed the 2 CSV file contains same number of lines.
string CSV1 = #"Id Name City
1 Tom New York
2 Mark FairFax";
string CSV2 = #"Id City
1 Las Vegas
2 Dallas";
dynamic rec1 = null;
dynamic rec2 = null;
StringBuilder csv3 = new StringBuilder();
using (var csvOut = new ChoCSVWriter(new StringWriter(csv3))
.WithFirstLineHeader()
.WithDelimiter("\t")
)
{
using (var csv1 = new ChoCSVReader(new StringReader(CSV1))
.WithFirstLineHeader()
.WithDelimiter("\t")
)
{
using (var csv2 = new ChoCSVReader(new StringReader(CSV2))
.WithFirstLineHeader()
.WithDelimiter("\t")
)
{
while ((rec1 = csv1.Read()) != null && (rec2 = csv2.Read()) != null)
{
rec1.City = rec2.City;
csvOut.Write(rec1);
}
}
}
}
Console.WriteLine(csv3.ToString());
Hope it helps.
Disclaimer: I'm the author of this library.
I am trying to search my text-file for a sentence by date and by name. Then I want this sentence to load it to some textboxes in my form. I am new in VS but I know some things about C.
I have came up with this code:
string pattern = dateTimePicker1Is.Text;
string pattern1 = _txtNameIs.Text;
IList<string> result = new List<string>();
using (var reader = new StreamReader(#"C:\Users\user\Desktop\Testet System\isto.txt"))
{
string currentLine;
while ((currentLine = reader.ReadLine()) != null)
{
if (currentLine.Contains(pattern))
{
if (currentLine.Contains(pattern1))
{
result.Add(currentLine);
string[] tempArray = currentLine.Split(',');
_txtNameIs.Text = tempArray[0];
_txtSurnameIs.Text = tempArray[1];
_txtApokxT.Text = tempArray[2];
_txtApoktT.Text = tempArray[3];
_txtEpanxT.Text = tempArray[4];
_txtEpandT.Text = tempArray[5];
_txtApokkT.Text = tempArray[6];
_txtEpankT.Text = tempArray[7];
_txtApoksT.Text = tempArray[8];
_txtEpansT.Text = tempArray[9];
_txtGenSun.Text = tempArray[10];
break;
}
else
{
MessageBox.Show("There are no records!");
}
}
}
}
When I press the search button it loads me all the data that I need, but if I have the same name and different date a message-box pops up and tells 'there are no records'.
I believe there is better way doing this:
string pattern = dateTimePicker1Is.Text;
string pattern1 = _txtNameIs.Text;
string pathToFile = #"C:\Users\user\Desktop\Testet System\isto.txt";
List<string> result = new List<string>();
foreach (var line in File.ReadAllLines(pathToFile))
{
if (line.Contains(pattern) && line.Contains(pattern1))
{
result.Add(line);
string[] tempArray = line.Split(',');
if(tempArray.Length >= 11)
{
_txtNameIs.Text = tempArray[0];
_txtSurnameIs.Text = tempArray[1];
_txtApokxT.Text = tempArray[2];
_txtApoktT.Text = tempArray[3];
_txtEpanxT.Text = tempArray[4];
_txtEpandT.Text = tempArray[5];
_txtApokkT.Text = tempArray[6];
_txtEpankT.Text = tempArray[7];
_txtApoksT.Text = tempArray[8];
_txtEpansT.Text = tempArray[9];
_txtGenSun.Text = tempArray[10];
break;
}
}
}
It is better to show message if the list count is 0.
I think your There are no records error should happen
AFTER the while statement
IF no matches were found
Right?
So put if there was a match found in a bool, and then after the while if !IsMatchFound then throw your error.
I'm importing records from a text file into my DB. I have it setup to check the record that I to ensure there is not already a duplicate in the database. If there is already an entry in the database it skips that record.
Now the issue I need to address is when there is not a record in the text file but there is a record in the database. I need to remove the record from the database that does not match any records in the file.
public static void ParseComplaint(string location)
{
Console.WriteLine("Parsing.....");
List<AutoMakeNoEntity> am = DBCacheHelper.GetAllMakes().ToList<AutoMakeNoEntity>();
using (var reader = new StreamReader(location))
{
foreach (string line in File.ReadLines(location))
{
if (string.IsNullOrWhiteSpace(line))
{
continue;
}
var tokens = line.Trim().Split(new char[] { '\t' });
if (am.Any(c => c.MakeName == tokens[3]))
{
using (RecallsContext context = new RecallsContext())
{
string tmp = tokens[0];
if (!context.complaints.Any(c => c.CMPLID == tmp))
{
var recalls = new Complaints();
recalls.CMPLID = tokens[0];
recalls.ODINO = tokens[1];
recalls.MFR_NAME = tokens[2];
recalls.MAKETXT = tokens[3];
recalls.MODELTXT = tokens[4];
recalls.YEARTXT = tokens[5];
recalls.CRASH = tokens[6];
recalls.FAILDATE = tokens[7];
recalls.FIRE = tokens[8];
recalls.INJURED = tokens[9];
recalls.DEATHS = tokens[10];
recalls.COMPDESC = tokens[11];
recalls.CITY = tokens[12];
recalls.STATE = tokens[13];
recalls.VIN = tokens[14];
recalls.DATEA = tokens[15];
recalls.LDATE = tokens[16];
recalls.MILES = tokens[17];
recalls.OCCURENCES = tokens[18];
recalls.CDESCR = tokens[19];
recalls.CMPL_TYPE = tokens[20];
recalls.POLICE_RPT_YN = tokens[21];
recalls.PURCH_DT = tokens[22];
recalls.ORIG_OWNER_YN = tokens[23];
recalls.ANTI_BRAKES_YN = tokens[24];
recalls.CRUISE_CONT_YN = tokens[25];
recalls.NUM_CYLS = tokens[26];
recalls.DRIVE_TRAIN = tokens[27];
recalls.FUEL_SYS = tokens[28];
recalls.FUEL_TYPE = tokens[29];
recalls.TRANS_TYPE = tokens[30];
recalls.VEH_SPEED = tokens[31];
recalls.DOT = tokens[32];
recalls.TIRE_SIZE = tokens[33];
recalls.LOC_OF_TIRE = tokens[34];
recalls.TIRE_FAIL_TYPE = tokens[35];
recalls.ORIG_EQUIP_YN = tokens[36];
recalls.MANUF_DT = tokens[37];
recalls.SEAT_TYPE = tokens[38];
recalls.RESTRAINT_TYPE = tokens[39];
recalls.DEALER_NAME = tokens[40];
recalls.DEALER_TEL = tokens[41];
recalls.DEALER_CITY = tokens[42];
recalls.DEALER_STATE = tokens[43];
recalls.DEALER_ZIP = tokens[44];
recalls.PROD_TYPE = tokens[45];
if (tokens.Length == 47)
{
recalls.REPAIRED_YN = tokens[46];
}
context.complaints.Add(recalls);
context.SaveChanges();
recalls.Dispose();
}
}
}
}
}
}
I need to remove the record from the database that does not match any records in the file.
If you want the database to match the contents of the text file, why don't you simply empty it first?