I would like to ask some tips and help on a reading/writing part of my C#.
Situation:
I have to read a CSV file; - OK
If the CSV file name starts with "Load_", I want to write on another CSV the data from line 2 to the last one;
If the CSV file name starts with "RO_", I want to write on 2 different CSVs, 1 with the line 1 to 4 and the other 4 to the last one;
What I have so far is:
public static void ProcessFile(string[] ProcessFile)
{
// Keeps track of your current position within a record
int wCurrLine = 0;
// Number of rows in the file that constitute a record
const int LINES_PER_ROW = 1;
int ctr = 0;
foreach (string filename in ProcessFile)
{
var sbText = new System.Text.StringBuilder(100000);
int stop_line = 0;
int start_line = 0;
// Used for the output name of the file
var dir = Path.GetDirectoryName(filename);
var fileName = Path.GetFileNameWithoutExtension(filename);
var ext = Path.GetExtension(filename);
var folderbefore = Path.GetFullPath(Path.Combine(dir, #"..\"));
var lineCount = File.ReadAllLines(#filename).Length;
string outputname = folderbefore + "output\\" + fileName;
using (StreamReader Reader = new StreamReader(#filename))
{
if (filename.Contains("RO_"))
{
start_line = 1;
stop_line = 5;
}
else
{
start_line = 2;
stop_line = lineCount;
}
ctr = 0;
while (!Reader.EndOfStream && ctr < stop_line)
{
// Add the text
sbText.Append(Reader.ReadLine());
// Increment our current record row counter
wCurrLine++;
// If we have read all of the rows for this record
if (wCurrLine == LINES_PER_ROW)
{
// Add a line to our buffer
sbText.AppendLine();
// And reset our record row count
wCurrLine = 0;
}
ctr++;
} // end of the while
}
int total_lenght = sbText.Length
// When all of the data has been loaded, write it to the text box in one fell swoop
using (StreamWriter Writer = new StreamWriter(dir + "\\" + "output\\" + fileName + "_out" + ext))
{
Writer.Write.(sbText.);
}
} // end of the foreach
} // end of ProcessFile
I was thinking about using the IF/ELSE: "using (StreamWriter Writer = new StreamWriter(dir + "\" + "output\" + fileName + "_out" + ext))" part. However, I am not sure how to pass, to StreamWriter, to only write from/to a specific line number.
Any Help is welcome! If I am missing some information, please, let me know (I am pretty new on stackoverflow).
Thank you.
Code is way too complicated
using System.Collections.ObjectModel;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace ConsoleApplication57
{
class Program
{
static void Main(string[] args)
{
}
public static void ProcessFile(string[] ProcessFile)
{
foreach (string filename in ProcessFile)
{
// Used for the output name of the file
var dir = Path.GetDirectoryName(filename);
var fileName = Path.GetFileNameWithoutExtension(filename);
var ext = Path.GetExtension(filename);
var folderbefore = Path.GetFullPath(Path.Combine(dir, #"..\"));
var lineCount = File.ReadAllLines(#filename).Length;
string outputname = folderbefore + "output\\" + fileName;
using (StreamWriter Writer = new StreamWriter(dir + "\\" + "output\\" + fileName + "_out" + ext))
{
int rowCount = 0;
using (StreamReader Reader = new StreamReader(#filename))
{
rowCount++;
string inputLine = "";
while ((inputLine = Reader.ReadLine()) != null)
{
if (filename.Contains("RO_"))
{
if (rowCount <= 4)
{
Writer.WriteLine(inputLine);
}
if (rowCount == 4) break;
}
else
{
if (rowCount >= 2)
{
Writer.WriteLine(inputLine);
}
}
} // end of the while
Writer.Flush();
}
}
} // end of the foreach
} // end of ProcessFile
}
}
You can use LINQ to Take and Skip lines.
public abstract class CsvProcessor
{
private readonly IEnumerable<string> processFiles;
public CsvProcessor(IEnumerable<string> processFiles)
{
this.processFiles = processFiles;
}
protected virtual IEnumerable<string> GetAllLinesFromFile(string fileName)
{
using(var stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read))
using(var reader = new StreamReader(stream))
{
var line = String.Empty;
while((line = reader.ReadLine()) != null)
{
yield return line;
}
}
}
protected virtual void ProcessFiles()
{
var sb1 = new StringBuilder();
var sb2 = new StringBuilder();
foreach(var file in this.processFiles)
{
var fileName = Path.GetFileNameWithoutExtension(file);
var lines = GetAllLinesFromFile(file);
if(fileName.StartsWith("RO_", StringComparison.InvariantCultureIgnoreCase))
{
sb1.AppendLine(lines.Take(4)); //take only the first four lines
sb2.AppendLine(lines.Skip(4).TakeWhile(s => !String.IsNullOrEmpty(s))); //skip the first four lines, take everything else
}
else if(fileName.StartsWith("Load_", StringComparison.InvariantCultureIgnoreCase)
{
sb2.AppendLine(lines.Skip(1).TakeWhile(s => !String.IsNullOrEmpty(s)));
}
}
// now write your StringBuilder objects to file...
}
protected virtual void WriteFile(StringBuilder sb1, StringBuilder sb2)
{
// ... etc..
}
}
Related
How can I split a text file that contains ASCII code SOH and ETX into multiple files?
For exmaple the text file I have named 001234.txt contains the following content:
SOH{ABCDXZY}ETX
SOH{ABCDXZY}ETX
SOH{ABCDXZY}ETX
I would like to split the single text file into multiple text files for each ASCII code that starts with SOH and ends with ETX.
The single text file name should be splitted into 101234.txt , 111234.txt..etc and each contains a single content that starts with SOH and ends with ETX.
I appreciate any help.
using System.IO;
using System.Linq;
namespace ASCII_Split
{
class Program
{
static void Main(string[] args)
{
var txt = "";
const char soh = (char)1;
const char eox = (char)3;
var count = 1;
var pathToFile = #"C:\Temp\00599060.txt";
using (var sr = new StreamReader(pathToFile))
txt = sr.ReadToEnd();
while (txt.Contains(soh))
{
var outfil = Path.Combine(Path.GetDirectoryName(pathToFile), count.ToString("000"), "_fix.txt");
var eInd = txt.IndexOf(eox);
using (var sw = new StreamWriter(outfil, false))
{
sw.Write(txt.Substring(1, eInd - 1));
}
txt = txt.Substring(eInd + 1);
count++;
}
}
}
}
This should more or less do the trick:
//Read all text from file into a string
var fileContent = File.ReadAllText("001234.txt");
//split text into array according to a Regex pattern
var pattern = #"SOH*ETX";
var splitContent = Regex.Split(fileContent, pattern);
//counter for file names
var counter = 10;
foreach(var content in splitContent)
{
//create file and use stream to write to it
using (var stream = File.Create($"{counter++}1234.txt"))
{
var contentAsBytes = new UTF8Encoding(true).GetBytes(content);
stream.Write(contentAsBytes, 0, contentAsBytes.Length);
}
}
Provided by SOH and ETX you mean the respective control characters, this here should get you on your way:
var txt = "";
const char soh = (char) 1;
const char eox = (char) 3;
var count = 1;
var pathToFile = #"C:\00_Projects_temp\test.txt";
using (var sr = new StreamReader(pathToFile))
txt = sr.ReadToEnd();
while (txt.Contains(soh))
{
var outfil = Path.Combine(Path.GetDirectoryName(pathToFile), count.ToString("000"), "_test.txt");
var eInd = txt.IndexOf(eox);
using (var sw = new StreamWriter(outfil, false))
{
sw.Write(txt.Substring(1, eInd - 1));
}
txt = txt.Substring(eInd + 1);
count++;
}
Thank you LocEngineer the program works, I did little change to concatonate the filename with the counter using "+" instead of ",".
using System.IO;
using System.Linq;
namespace ASCII_Split
{
class Program
{
static void Main(string[] args)
{
var txt = "";
const char soh = (char)1;
const char eox = (char)3;
var count = 1;
var pathToFile = #"C:\Temp\00599060.txt";
using (var sr = new StreamReader (pathToFile))
txt = sr.ReadToEnd();
if (txt.IndexOf(soh) != txt.LastIndexOf(soh))
{
while (txt.Contains(soh))
{
var outfil = Path.Combine(Path.GetDirectoryName(pathToFile), count.ToString("00") + Path.GetFileName(pathToFile));
var eInd = txt.IndexOf(eox);
using (var sw = new StreamWriter(outfil, false))
{
sw.Write(txt.Substring(1, eInd - 1));
}
txt = txt.Substring(eInd + 1);
count++;
}
File.Move((pathToFile), (pathToFile) + ".org");
}
}
}
}
I have a csv file, and I need to add a unique ID based on the first two characters of the file. I have the following code:
using (StreamReader sr = new StreamReader(f))
{
string currentLine;
int id = 0;
while ((currentLine = sr.ReadLine()) != null)
{
string row = currentLine.ToString();
string FirstTwoCharacters = currentLine.Substring(0, 2);
if (FirstTwoCharacters == "01")
{
id = id + 1;
row += "*" + id.ToString();
using (StreamWriter files = File.AppendText(dir + newfilename))
{
files.WriteLine(row);
}
}
else
{
row += "*" + id.ToString();
using (StreamWriter files = File.AppendText(dir + newfilename))
{
files.WriteLine(row);
}
}
}
}
The csv files can be huge, 1Gb in size, around 6 million rows. Just wanted advice, if there is a quicker way to handling this, as it currently can take 3+ hours to process a file, and multiple files can be received in one go.
Instead of opening new file for appending line for each line of input file you can keep stream writer opened:
using (StreamReader sr = new StreamReader(f))
using (StreamWriter files = File.AppendText(dir + newfilename))
{
string currentLine;
int id = 0;
while ((currentLine = sr.ReadLine()) != null)
{
string firstTwoCharacters = currentLine.Substring(0, 2);
if (firstTwoCharacters == "01")
id++;
files.WriteLine(currentLine + "*" + id);
}
}
You can also use File.ReadLines to enumerate source lines:
using (StreamWriter writer = File.AppendText(dir + newfilename))
{
int id = 0;
foreach(var line in File.ReadLines(f))
{
if (line.Substring(0,2) == "01")
id++;
writer.WriteLine($"{line}*{id}");
}
}
Or even LINQ approach
int id = 0;
var newLines = from line in File.ReadLines(f)
let incrementId = line.Substring(0,2) == "01"
select $"{line}*{incrementId ? (++id) : id}";
File.WriteAllLines(dir + newfilename, newLines);
opening the (File.AppendText) inside the while loop is costly, move this to outside the while
using (StreamReader sr = new StreamReader(f))
{
string currentLine;
int id = 0;
using (StreamWriter files = File.AppendText(dir + newfilename))
{
while ((currentLine = sr.ReadLine()) != null)
{
string row = currentLine.ToString();
string FirstTwoCharacters = currentLine.Substring(0, 2);
if (FirstTwoCharacters == "01")
{
id = id + 1;
row += "*" + id.ToString();
files.WriteLine(row);
}
else
{
row += "*" + id.ToString();
files.WriteLine(row);
}
}
}
}
I have following code:
using System;
using System.Collections.Generic;
using System.IO;
using VirusTotalNET;
using VirusTotalNET.Objects;
using System.Linq;
using System.Security.Permissions;
namespace VirusTotalNETClient
{
class Program
{
private const string ScanUrl = "http://www.google.com/";
static void Main(string[] args)
{
VirusTotal virusTotal = new VirusTotal("5d8684f50946c2bdeaf5c4fd966f61f3661de808e9d7324b99788d6f4fb7ad57");
//Use HTTPS instead of HTTP
virusTotal.UseTLS = true;
//creating folder for programs reliqies and output log
string folderName = "C:\\OnlineScanner";
System.IO.Directory.CreateDirectory(folderName);
//get list of files to analyse
var paths = Traverse("C:\test");
File.WriteAllLines("C:\\OnlineScanner\\test.txt", paths);
foreach (string line in File.ReadLines("C:\\test.txt"))
{
//Define what file you want to analyse
FileInfo fileInfo = new FileInfo(line);
//Check if the file has been scanned before.
FileReport fileReport = virusTotal.GetFileReport(fileInfo);
bool hasFileBeenScannedBefore = fileReport.ResponseCode == ReportResponseCode.Present;
//If the file has been scanned before, the results are embedded inside the report.
if (hasFileBeenScannedBefore)
{
int detekce = fileReport.Positives;
if (detekce >= 1)
{
using (var writer = new StreamWriter("C:\\OnlineScanner\\OnlineScannerLog.txt"))
{
writer.WriteLine(line);
writer.WriteLine("URL to test: " + fileReport.Permalink);
writer.WriteLine("Detect ratio: " + fileReport.Positives + "/54");
writer.WriteLine("Message: " + fileReport.VerboseMsg);
writer.WriteLine();
writer.WriteLine();
}
}
System.Threading.Thread.Sleep(16000);
}
else
{
ScanResult fileResult = virusTotal.ScanFile(fileInfo);
int detekce = fileReport.Positives;
if (detekce >= 1)
{
using (var writer = new StreamWriter("C:\\OnlineScanner\\OnlineScannerLog.txt"))
{
writer.WriteLine(line);
writer.WriteLine("URL to test: " + fileReport.Permalink);
writer.WriteLine("Detect ratio: " + fileReport.Positives + "/54");
writer.WriteLine("Message: " + fileReport.VerboseMsg);
writer.WriteLine();
writer.WriteLine();
}
}
System.Threading.Thread.Sleep(16000);
}
}
}
private static IEnumerable<string> Traverse(string rootDirectory)
{
IEnumerable<string> files = Enumerable.Empty<string>();
IEnumerable<string> directories = Enumerable.Empty<string>();
try
{
// The test for UnauthorizedAccessException.
var permission = new FileIOPermission(FileIOPermissionAccess.PathDiscovery, rootDirectory);
permission.Demand();
files = Directory.GetFiles(rootDirectory);
directories = Directory.GetDirectories(rootDirectory);
}
catch
{
// Ignore folder (access denied).
rootDirectory = null;
}
foreach (var file in files)
{
yield return file;
}
// Recursive call for SelectMany.
var subdirectoryItems = directories.SelectMany(Traverse);
foreach (var result in subdirectoryItems)
{
yield return result;
}
}
}
}
This code run some time (arround 15secs) but then program crashs.
The error is
System.IO.IOException, process can't access to file C:\hiberfil.sys.
http://upnisito.cz/images/2016_12/319crasherrror.png
Do you have any idea how to solve it?
I have a txt file with data such as the following:
:10FF800000040B4E00040B4E00047D1400047D148D
:10FF900000040B4E0004CF6200040B4E00040B4E15
:10FFA00000040B4E00040B4E00040B4E00040B4EDD
:10FFB00000047D1400047D1400047D1400047D14ED
:10FFC00000040B4E000000000000000000000000D4
:10FFD0000000000000040B4E0000000000000000C4
:10FFE0000000000000000000000000000000000011
:10FFF0000000000000000000060000000000BFF844
:020000020000FC
:020000040014E6
:043FF0005AC8A58C7A
:00000001FF
what I want to do with my C# program is to add a line after or before a specific line, lets say add the line:
:020000098723060
before this line:
:020000020000FC
I have tried using the File.ReadLines("file.txt").Last(); but that just gives me the last one, what if i want the third or fourth? also, is there any way to identify the ":" in the file?
The simplest way - if you're happy to read the whole file into memory - would be just:
public void InsertLineBefore(string file, string lineToFind, string lineToInsert)
{
List<string> lines = File.ReadLines(file).ToList();
int index = lines.IndexOf(lineToFind);
// TODO: Validation (if index is -1, we couldn't find it)
lines.Insert(index, lineToInsert);
File.WriteAllLines(file, lines);
}
public void InsertLineAfter(string file, string lineToFind, string lineToInsert)
{
List<string> lines = File.ReadLines(file).ToList();
int index = lines.IndexOf(lineToFind);
// TODO: Validation (if index is -1, we couldn't find it)
lines.Insert(index + 1, lineToInsert);
File.WriteAllLines(file, lines);
}
There are significantly more efficient ways of doing this, but this approach is really simple.
A brute force approach
string[] lines = File.ReadAllLines("file.txt");
using(StreamWrite sw = new StreamWriter("file.txt"))
{
foreach(string line in lines)
{
if(line == ":020000020000FC")
sw.WriteLine(":020000098723060");
sw.WriteLine(line);
}
}
I would say it is better to read and write line by line, especially if the target file tend to be of large size:
using (StreamReader r = new StreamReader("Test.txt"))
{
using (StreamWriter w = new StreamWriter("TestOut.txt"))
{
while (!r.EndOfStream)
{
string line = r.ReadLine();
w.WriteLine(line);
if (line == ":020000020000FC")
w.WriteLine(":020000098723060");
}
w.Close();
r.Close();
}
}
Not sure if you're trying to avoid reading the entire file in due to size, etc...but can't you just read the file and then replace...e.g.
var text = readFile(somePath);
writeFile( text.replace(":020000020000FC\n",":020000098723060\n:020000020000FC\n") , somePath);
Here is a solution, though it may not be the best, it does work:
public void AddTextToFile(string filePath, int lineNumber, string txt) //zero based lineNumber
{
Collection<string> newLines = new Collection<string>(File.ReadAllLines(filePath).ToList());
if (lineNumber < newLines.Count)
newLines.Insert(lineNumber, txt);
else
newLines.Add(txt);
using (StreamWriter writer = new StreamWriter(filePath, false))
{
foreach (string s in newLines)
writer.WriteLine(s);
}
}
And to answer your question about determining if ":" exists in a string, the answer is yes, in the example above, you could check if the line contains it by...
if(newLines[idx].Contains(':'))
//do something
The ":" character doesn't really help the implementation, the lines are all newline-delimited already.
Here's an attempt at a method that doesn't load it all to memory or output to a different file.
Never cross the streams.
static Int32 GetCharPos(StreamReader s)
{
var ia = BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.GetField;
Int32 charpos = (Int32)s.GetType().InvokeMember("charPos", ia, null, s, null);
Int32 charlen = (Int32)s.GetType().InvokeMember("charLen", ia, null, s, null);
return (Int32)s.BaseStream.Position - charlen + charpos;
}
static void Appsert(string data, string precedingEntry = null)
{
if (precedingEntry == null)
{
using (var filestream = new FileStream(dataPath, FileMode.Append))
using (var tw = new StreamWriter(filestream))
{
tw.WriteLine(data);
return;
}
}
int seekPos = -1;
using (var readstream = new FileStream(dataPath,
FileMode.Open, FileAccess.Read, FileShare.Write))
using (var writestream = new FileStream(dataPath,
FileMode.Open, FileAccess.Write, FileShare.Read))
using (var tr = new StreamReader(readstream))
{
while (seekPos == -1)
{
var line = tr.ReadLine();
if (line == precedingEntry)
seekPos = GetCharPos(tr);
else if (tr.EndOfStream)
seekPos = (int)readstream.Length;
}
writestream.Seek(seekPos, SeekOrigin.Begin);
readstream.Seek(seekPos, SeekOrigin.Begin);
int readLength = 0;
var readBuffer = new byte[4096];
var writeBuffer = new byte[4096];
var writeData = tr.CurrentEncoding.GetBytes(data + Environment.NewLine);
int writeLength = writeData.Length;
writeData.CopyTo(writeBuffer, 0);
while (true & writeLength > 0)
{
readLength = readstream.Read(readBuffer, 0, readBuffer.Length);
writestream.Write(writeBuffer, 0, writeLength);
var tmp = writeBuffer;
writeBuffer = readBuffer;
writeLength = readLength;
readBuffer = tmp;
}
}
}
I'm having some trouble integrating two pieces of code. The first checks the size of a file and the next one loops trough a SQL database and looks for a matching name for a file. I basically want to check if it's a new file or if the file has changed since I logged some of it's data last time.
This gets the size of each file in the directory
// Make a reference to a directory.
DirectoryInfo di = new DirectoryInfo("C:\\Users");
// Get a reference to each file in that directory.
FileInfo[] fiArr = di.GetFiles();
// Display the names and sizes of the files.
MessageBox.Show("The directory {0} contains the following files:", di.Name);
foreach (FileInfo f in fiArr)
MessageBox.Show("The size of" + f.Name + " is " + f.Length + " bytes.");
This code loops untill it finds a mach or untill all entries has been looked trough.
try
{
// LINQ query for all files containing the word '.txt'.
var files = from file in
Directory.EnumerateFiles("C:\\Users")
where file.ToLower().Contains(".txt")
select file;
foreach (var file in files)
{
//Get path to HH file
filename = System.IO.Path.GetFileName(file);
tempString = "";
//Keep looking trough database utill database empty or HH found
while (inc != numberOfSessions && (filename != tempString))
{
sessionRow = sessions.Tables["Sessions"].Rows[inc];
tempString = sessionRow.ItemArray.GetValue(1).ToString();
inc++;
}
Lets say ItemAttay.GetValue(2) returns the saved size of a file. How can i most efficiently keep the while loop going if
inc != numberOfSessions && (filename != tempString) && (sessionRow.ItemArray.GetValue(2) == f.length)
Thanks for having a look!
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Data;
class Program
{
static void Main(string[] args)
{
var files1 = new List<string>(Directory.GetFiles(args[0],
"*.txt",
SearchOption.AllDirectories));
List<FileData> ListFiles = new List<FileData>();
for (int i = 0; i < files1.Count; i++)
{
FileInfo file = new FileInfo(files1[i]);
FileData _tmpfile = new FileData(file.Name.ToString(), file.Length,
File.GetLastWriteTime(files1[1]).ToString("yyyy-MM-dd H:mm:ss"),
File.GetLastAccessTime(files1[1]).ToString("yyyy-MM-dd H:mm:ss"));
ListFiles.Add(_tmpfile);
}
DataSet sessions = new DataSet();
DataTable dt = sessions.Tables["Sessions"];
for (int i = 0; i < ListFiles.Count; i++)
{
//compares every file in folder to database
FileData _tmp = ListFiles[i];
for (int j = 0; j < dt.Rows.Count; j++)
{
if (_tmp.GSFileName == dt.Rows[i][0].ToString())
{
//put some code here
break;
}
if (_tmp.GSSize == long.Parse(dt.Rows[i][1].ToString()))
{
//put some code here
break;
}
}
}
}
}
public class FileData
{
string FileName = "";
public string GSFileName
{
get { return FileName; }
set { FileName = value; }
}
long Size = 0;
public long GSSize
{
get { return Size; }
set { Size = value; }
}
string DateOfModification = "";
public string GSDateOfModification
{
get { return DateOfModification; }
set { DateOfModification = value; }
}
string DateOfLastAccess = "";
public string GSDateOfLastAccess
{
get { return DateOfLastAccess; }
set { DateOfLastAccess = value; }
}
public FileData(string fn, long si, string dateofmod, string dateofacc)
{
FileName = fn;
Size = si;
DateOfModification = dateofmod;
DateOfLastAccess = dateofacc;
}
}