I'm trying to read some files with ReadLine, but my file have some break lines that I need to catch (not all of them), and I don't know how to get them in the same array, neither in any other array with these separators... because... ReadLine reads lines, and break these lines, huh?
I can't replace these because I need to check it after the process, so I need to get the breaklines AND the content after that. That's the problem. How can I do that?
Here's my code:
public class ReadFile
{
string extension;
string filename;
System.IO.StreamReader sr;
public ReadFile(string arquivo, System.IO.StreamReader sr)
{
string ext = Path.GetExtension(arquivo);
sr = new StreamReader(arquivo, System.Text.Encoding.Default);
this.sr = sr;
this.extension = ext;
this.filename = Path.GetFileNameWithoutExtension(arquivo);
if (ext.Equals(".EXP", StringComparison.OrdinalIgnoreCase))
{
ReadEXP(arquivo);
}
else MessageBox.Show("Extensão de arquivo não suportada: "+ext);
}
public void ReadEXP(string arquivo)
{
string line = sr.ReadLine();
string[] words;
string[] Separators = new string[] { "<Segment>", "</Segment>", "<Source>", "</Source>", "<Target>", "</Target>" };
string ID = null;
string Source = null;
string Target = null;
DataBase db = new DataBase();
//db.CreateTable_EXP(filename);
db.CreateTable_EXP();
while ((line = sr.ReadLine()) != null)
{
try
{
if (line.Contains("<Segment>"))
{
ID = "";
words = line.Split(Separators, StringSplitOptions.None);
ID = words[0];
for (int i = 1; i < words.Length; i++ )
ID += words[i];
MessageBox.Show("Segment[" + words.Length + "]: " + ID);
}
if (line.Contains("<Source>"))
{
Source = "";
words = line.Split(Separators, StringSplitOptions.None);
Source = words[0];
for (int i = 1; i < words.Length; i++)
Source += words[i];
MessageBox.Show("Source[" + words.Length + "]: " + Source);
}
if (line.Contains("<Target>"))
{
Target = "";
words = line.Split(Separators, StringSplitOptions.None);
Target = words[0];
for (int i = 1; i < words.Length; i++)
Target += words[i];
MessageBox.Show("Target[" + words.Length + "]: " + Target);
db.PopulateTable_EXP(ID, Source, Target);
MessageBox.Show("ID: " + ID + "\nSource: " + Source + "\nTarget: " + Target);
}
}
catch (IndexOutOfRangeException e)
{
MessageBox.Show(e.Message.ToString());
MessageBox.Show("ID: " + ID + "\nSource: " + Source + "\nTarget: " + Target);
}
}
return;
}
If you are trying to read XML, try using the built in libaries, here is a simple example of loading a section of XML with <TopLevelTag> in it.
var xmlData = XDocument.Load(#"C:\folder\file.xml").Element("TopLevelTag");
if (xmlData == null) throw new Exception("Failed To Load XML");
Here is a tidy way to get content without it throwing an exception if missing from the XML.
var xmlBit = (string)xmlData.Element("SomeSubTag") ?? "";
If you really have to roll your own, then look at examples for CSV parsers,
where ReadBlock can be used to get the raw data including line breaks.
private char[] chunkBuffer = new char[4096];
var fileStream = new System.IO.StreamReader(new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite));
var chunkLength = fileStream.ReadBlock(chunkBuffer, 0, chunkBuffer.Length);
Related
I am having problems with reading Korean characters from datagridview. I read korean alphabets from csv and written them to datagridview. My method is as follows:
int encoder = 949;
public void DisplayPCM()
{
StreamReader sr;
Encoding korean = Encoding.GetEncoding(encoder);
string selectedDirectory = projectPath + "\\Spec";
string filePath = selectedDirectory + "\\" + functionfile;
pcmpath = new StringBuilder(filePath);
if (Directory.Exists(selectedDirectory))
{
if (File.Exists(filePath))
{
try
{
sr = new StreamReader(filePath, korean);
int lineCount = 0;
StringBuilder readLine = new StringBuilder(sr.ReadLine());
while (readLine.ToString() != null && readLine.ToString() != "")
{
string[] substr = readLine.ToString().Split(',');
if (lineCount >= 1)
{
dPCM.Rows[lineCount - 1].Cells[0].Value = substr[0];
dPCM.Rows[lineCount - 1].Cells[1].Value = substr[1];
dPCM.Rows[lineCount - 1].Cells[2].Value = substr[2];
dPCM.Rows[lineCount - 1].Cells[3].Value = substr[3];
}
readLine = new StringBuilder(sr.ReadLine());
lineCount++;
}
sr.Close();
}
catch
{
MessageBox.Show("Error in Part File \n" + filePath);
}
}
else
{
MessageBox.Show(filePath + "\n does not exist.");
}
}
else
{
}
}
Until this point, there are no problems. The characters in datagridview are displayed in korean alphabets.
Now i have to edit the text in datagridview and read back the text and save it in csv file and reload this csv file text to datagridview.
When I did this procedure, I get some chinese text. It looks like when reading the string from datagridview, there is a problem. For reading datagridview text and saving it in csv file, my steps are as follows:
private void Save_PCM()
{
DirectoryInfo di;
StreamWriter sw = null;
int rowcount = 0;
StringBuilder Newnum, NewName, Newlower, Newupper;
try
{
Encoding korean = Encoding.GetEncoding(encoder);
string selectedDirectory = projectPath + "\\Spec";
string filePath = selectedDirectory + "\\" + functionfile;
pcmpath = new StringBuilder(filePath);
sw = new StreamWriter(pcmpath.ToString());
sw.WriteLine("NO,NAME,Lower SPEC,Upper SPEC");
while (dPCM.Rows[rowcount].Cells[0].Value != null)
{
Newnum = new StringBuilder(dPCM.Rows[rowcount].Cells[0].Value.ToString());
NewName = new StringBuilder(dPCM.Rows[rowcount].Cells[1].Value.ToString());
Newlower = new StringBuilder(dPCM.Rows[rowcount].Cells[2].Value.ToString());
Newupper = new StringBuilder(dPCM.Rows[rowcount].Cells[3].Value.ToString());
if (Newnum == null || NewName == null || Newlower == null || Newupper == null)
break;
sw.WriteLine(Newnum + "," + NewName + "," + Newlower + "," + Newupper);
rowcount++;
}
sw.Close();
}
catch (IOException e)
{
MessageBox.Show("Project name error" + Environment.NewLine + e.Message);
}
catch (NullReferenceException e)
{
MessageBox.Show("Enter all values in PCM" + Environment.NewLine + e.Message);
}
finally
{
if (sw != null)
sw.Close();
}
}
I think that I have to read datagridview cell values by including some encoding methods but I am clueless.
How can I read the text encoded in korean language from datagridview. Thanks in advance.
Now it looks like it works. I thought that the problem was in reading the encoded text from datagridview but the problem was in writing the encoded text to csv file. I made changes in function to read the data into .csv file. I declared a filestream path and added the filestream path and encoding to the streamwriter.
private void Save_PCM()
{
DirectoryInfo di;
FileStream stream = null;
StreamWriter sw = null;
int rowcount = 0;
StringBuilder Newnum, NewName, Newlower, Newupper;
try
{
Encoding korean = Encoding.GetEncoding(encoder);
string selectedDirectory = projectPath + "\\Spec";
string filePath = selectedDirectory + "\\" + functionfile;
pcmpath = new StringBuilder(filePath);
stream = new FileStream(pcmpath.ToString(), FileMode.Create); // overwrite existing file
sw = new StreamWriter(stream, korean);
sw.WriteLine("NO,NAME,Lower SPEC,Upper SPEC");
while (dPCM.Rows[rowcount].Cells[0].Value != null)
{
Newnum = new StringBuilder(dPCM.Rows[rowcount].Cells[0].Value.ToString());
byte[] bb = korean.GetBytes(dPCM.Rows[rowcount].Cells[1].Value.ToString());
NewName = new StringBuilder(Encoding.Default.GetString(bb));
Newlower = new StringBuilder(dPCM.Rows[rowcount].Cells[2].Value.ToString());
Newupper = new StringBuilder(dPCM.Rows[rowcount].Cells[3].Value.ToString());
if (Newnum == null || NewName == null || Newlower == null || Newupper == null)
break;
sw.WriteLine(Newnum + "," + NewName + "," + Newlower + "," + Newupper);
rowcount++;
}
sw.Close();
}
catch (IOException e)
{
MessageBox.Show("Project name error" + Environment.NewLine + e.Message);
}
catch (NullReferenceException e)
{
MessageBox.Show("Enter all values in PCM" + Environment.NewLine + e.Message);
}
finally
{
if (sw != null)
sw.Close();
}
}
I have a csv file, and I need to add a unique ID based on the first two characters of the file. I have the following code:
using (StreamReader sr = new StreamReader(f))
{
string currentLine;
int id = 0;
while ((currentLine = sr.ReadLine()) != null)
{
string row = currentLine.ToString();
string FirstTwoCharacters = currentLine.Substring(0, 2);
if (FirstTwoCharacters == "01")
{
id = id + 1;
row += "*" + id.ToString();
using (StreamWriter files = File.AppendText(dir + newfilename))
{
files.WriteLine(row);
}
}
else
{
row += "*" + id.ToString();
using (StreamWriter files = File.AppendText(dir + newfilename))
{
files.WriteLine(row);
}
}
}
}
The csv files can be huge, 1Gb in size, around 6 million rows. Just wanted advice, if there is a quicker way to handling this, as it currently can take 3+ hours to process a file, and multiple files can be received in one go.
Instead of opening new file for appending line for each line of input file you can keep stream writer opened:
using (StreamReader sr = new StreamReader(f))
using (StreamWriter files = File.AppendText(dir + newfilename))
{
string currentLine;
int id = 0;
while ((currentLine = sr.ReadLine()) != null)
{
string firstTwoCharacters = currentLine.Substring(0, 2);
if (firstTwoCharacters == "01")
id++;
files.WriteLine(currentLine + "*" + id);
}
}
You can also use File.ReadLines to enumerate source lines:
using (StreamWriter writer = File.AppendText(dir + newfilename))
{
int id = 0;
foreach(var line in File.ReadLines(f))
{
if (line.Substring(0,2) == "01")
id++;
writer.WriteLine($"{line}*{id}");
}
}
Or even LINQ approach
int id = 0;
var newLines = from line in File.ReadLines(f)
let incrementId = line.Substring(0,2) == "01"
select $"{line}*{incrementId ? (++id) : id}";
File.WriteAllLines(dir + newfilename, newLines);
opening the (File.AppendText) inside the while loop is costly, move this to outside the while
using (StreamReader sr = new StreamReader(f))
{
string currentLine;
int id = 0;
using (StreamWriter files = File.AppendText(dir + newfilename))
{
while ((currentLine = sr.ReadLine()) != null)
{
string row = currentLine.ToString();
string FirstTwoCharacters = currentLine.Substring(0, 2);
if (FirstTwoCharacters == "01")
{
id = id + 1;
row += "*" + id.ToString();
files.WriteLine(row);
}
else
{
row += "*" + id.ToString();
files.WriteLine(row);
}
}
}
}
I'm trying to execute an SSIS package with a scripting component in Visual Basic 2010. I get the following error when I execute the package:
public void Main()
{
// TODO: Custom Code starts here
/*
* Description: Reads the input CMI Stats files and converts into a more readable format
* This Code for Better CMI Parser is converted as per SC's original code by S.A. on 3/6/2014
* Here is the description from original procedure
* CustType = DOMESTIC/INTERNATIONAL/ETC
* CategoryType = SBU/MAN
* Category = Actual value (AI/CC/etc)
* DataType = INCOMING or SHIP (or something else later?)
*
* 3/23/2010
* Uncommented the CAD file load....
*/
string[,] filesToProcess = new string[2, 2] { {(String)Dts.Variables["csvFileNameUSD"].Value,"USD" }, {(String)Dts.Variables["csvFileNameCAD"].Value,"CAD" } };
string headline = "CustType,CategoryType,CategoryValue,DataType,Stock QTY,Stock Value,Floor QTY,Floor Value,Order Count,Currency";
string outPutFile = Dts.Variables["outputFile"].Value.ToString();
//Declare Output files to write to
FileStream sw = new System.IO.FileStream(outPutFile, System.IO.FileMode.Create);
StreamWriter w = new StreamWriter(sw);
w.WriteLine(headline);
//Loop Through the files one by one and write to output Files
for (int x = 0; x < filesToProcess.GetLength(1); x++)
{
if (System.IO.File.Exists(filesToProcess[x, 0]))
{
string categoryType = "";
string custType = "";
string dataType = "";
string categoryValue = "";
//Read the input file in memory and close after done
StreamReader sr = new StreamReader(filesToProcess[x, 0]);
string fileText = sr.ReadToEnd();
string[] lines = fileText.Split(Convert.ToString(System.Environment.NewLine).ToCharArray());
sr.Close();
//Read String line by line and write the lines with params from sub headers
foreach (string line in lines)
{
if (line.Split(',').Length > 3)
{
string lineWrite = "";
lineWrite = line;
string[] cols = line.Split(',');
if (HeaderLine(cols[1]))
{
string[] llist = cols[0].Split();
categoryType = llist[llist.Length - 1];
custType = llist[0];
dataType = llist[1];
if (dataType == "COMPANY")
{
custType = llist[0] + " " + llist[1];
dataType = llist[2];
}
}
if (cols[0].Contains("GRAND"))
{
categoryValue = "Total";
}
else
{
string[] col0 = cols[0].Split(' ');
categoryValue = col0[col0.Length - 1];
}
int z = 0;
string[] vals = new string[cols.Length];
for (int i = 1; i < cols.Length - 1; i++)
{
vals[z] = cols[i].Replace(',', ' ');
z++;
}
//line = ",".join([CustType, CategoryType, CategoryValue, DataType, vals[0], vals[1], vals[2], vals[3], vals[6], currency])
lineWrite = clean(custType) + "," + clean(categoryType) + "," + clean(categoryValue) + ","
+ clean(dataType) + "," + clean(vals[0]) + "," + clean(vals[1]) + "," + clean(vals[2])
+ "," + clean(vals[3]) + "," + clean(vals[6]) + "," + filesToProcess[x, 1];
if (!HeaderLine(line))
{
w.WriteLine(lineWrite);
w.Flush();
}
}
}
}
}
w.Close();
sw.Close();
//Custom Code ends here
Dts.TaskResult = (int)ScriptResults.Success;
}
public bool HeaderLine(String line)
{
return line.Contains("Stock Qty");
}
public string clean(string str)
{
if (str != null)
return Regex.Replace(str,#"[""]","");
//return str.Replace('"', ' ');
else
return "";
}
#region ScriptResults declaration
/// <summary>
/// This enum provides a convenient shorthand within the scope of this class for setting the
/// result of the script.
///
/// This code was generated automatically.
/// </summary>
enum ScriptResults
{
Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success,
Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
};
#endregion
}
}
Can anyone suggest what could have possibly gone wrong or maybe how to debug this code in order to understand the errors?
Thanks!
Here's how you debug scripts in SSIS
With the code open, put a breakpoint
Close the code
Run the package
When the script starts running, it will open up a code window and you can walk through the code step by step
File A B contains million urls.
1, go through the url in file A one by one.
2, extract subdomain.com (http://subdomain.com/path/file)
3, if subdomain.com exist file B, save it to file C.
Any quickest way to get file C with c#?
Thanks.
when i use readline, it have no much different.
// stat
DateTime start = DateTime.Now;
int totalcount = 0;
int n1;
if (!int.TryParse(num1.Text, out n1))
n1 = 0;
// memory
dZLinklist = new Dictionary<string, string>();
// read file
string fileName = openFileDialog1.FileName; // get file name
textBox1.Text = fileName;
StreamReader sr = new StreamReader(textBox1.Text);
string fullfile = File.ReadAllText(#textBox1.Text);
string[] sArray = fullfile.Split( '\n');
//IEnumerable<string> sArray = tool.GetSplit(fullfile, '\n');
//string sLine = "";
//while (sLine != null)
foreach ( string sLine in sArray)
{
totalcount++;
//sLine = sr.ReadLine();
if (sLine != null)
{
//string reg = "http[s]*://.*?/";
//Regex R = new Regex(reg, RegexOptions.Compiled);
//Match m = R.Match(sLine);
//if(m.Success)
int length = sLine.IndexOf(' ', n1); // default http://
if(length > 0)
{
//string urls = sLine.Substring(0, length);
dZLinklist[sLine.Substring(0,length)] = sLine;
}
}
}
TimeSpan time = DateTime.Now - start;
int count = dZLinklist.Count;
double sec = Math.Round(time.TotalSeconds,2);
label1.Text = "(" + totalcount + ")" + count.ToString() + " / " + sec + " = " + (Math.Round(count / sec,2)).ToString();
sr.Close();
I would go for using Microsoft LogParser for processing big files: MS LogParser. Are you limited to implement it in described way only?
currently have the following code:
string[] fileLineString = File.ReadAllLines(Server.MapPath("~") + "/App_Data/Users.txt");
for (int i = 0; i < fileLineString.Length; i++)
{
string[] userPasswordPair = fileLineString[i].Split(' ');
if (Session["user"].ToString() == userPasswordPair[0])
{
userPasswordPair[i].Replace(userPasswordPair[1], newPasswordTextBox.Text);
}
}
}
the text file is set out as: 'username' 'password
what i'm trying to do is be able to edit the password and replace it with a new one using my code, but my code seems to do nothing and the text file just stays the same.
string[] fileLineString = File.ReadAllLines(Server.MapPath("~") + "/App_Data/Users.txt");
for (int i = 0; i < fileLineString.Length; i++)
{
string[] userPasswordPair = fileLineString[i].Split(' ');
if (Session["user"].ToString() == userPasswordPair[0])
{
// set the new password in the same list and save the file
fileLineString[i] = Session["user"].ToString() + " " + newPasswordTextBox.Text;
File.WriteAllLines((Server.MapPath("~") + "/App_Data/Users.txt"), fileLineString);
break; // exit from the for loop
}
}
At the moment, you're not storing the file.
Your replace is not assigned to a variable (Replace does not edit or write anything, it just returns the new string object).
Corrected code:
string[] fileLineString = File.ReadAllLines(Server.MapPath("~") + "/App_Data/Users.txt");
for (int i = 0; i < fileLineString.Length; i++)
{
string[] userPasswordPair = fileLineString[i].Split(' ');
if (Session["user"].ToString() == userPasswordPair[0])
{
fileLineString[i] = fileLineString[i].Replace(userPasswordPair[1], newPasswordTextBox.Text);
break;
}
}
File.WriteAllLines((Server.MapPath("~") + "/App_Data/Users.txt", fileLineString);
String _userName = "User";
String _newPassword = "Password";
// Reading All line from file
String _fileContent = System.IO.File.ReadAllLines("filePath").ToString();
// Pattern which user password like to changed
string _regPettern = String.Format(#"{0} ?(?<pwd>\w+)[\s\S]*?", _userName);
Regex _regex2 = new Regex(_regPettern, RegexOptions.IgnoreCase);
String _outPut = Regex.Replace(_fileContent, _regPettern, m => m.Groups[1] + " " + _newPassword);
// Writing to file file
System.IO.File.WriteAllText("filePath", _outPut);