I am reading IMDB movies listing from a text file on my harddrive (originally available from IMDB site at ftp://ftp.fu-berlin.de/pub/misc/movies/database/movies.list.gz).
It takes around 5 minutes on my machine (basic info: Win7 x64bit, 16GB RAM, 500 GB SATA Hardisk 7200 RPM) to read this file line by line using code below.
I have two questions:
Is there any way I can optimize code to improve the read time?
Data access don't need to be sequential as I won't mind reading data from top to bottom / bottom to top or any order for that matter as long as it read one line at a time. I am wondering is there a way to read in multiple directions to improve the read time?
The application is a Windows Console Application.
Update: Many responses correctly pointed out that Writing to the Console takes substantial time. Considering that the displaying of data on the Windows Console is now desirable but not mandatory.
//Code Block
string file = #"D:\movies.list";
FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 8, FileOptions.None);
using (StreamReader sr = new StreamReader(fs))
{
while (sr.Peek() >= 0)
{
Console.WriteLine(sr.ReadLine());
}
}
I'm not certain whether this is more efficient or not, but an alternate method would be to use File.ReadAllLines:
var movieFile = File.ReadAllLines(file);
foreach (var movie in movieFile)
Console.WriteLine(movie);
I am not a c# developer, but how about doing a bulk insert into database using the file(which will be one time). Then you can reuse the data and export as well.
In .net 4 you can use File.ReadLines for lazy evaluation and thus lower RAM usage when working on large files.
You can do linq operation directly on files and this along with File.ReadLines would improve load time.
For better understanding you can check, Read text file word-by-word using LINQ
You can also do comparison as well but putting time intervals.
However if you making web app you can read whole file on application start event and cache them in application pool for better performanace.
First of all, if you don't care about printing out the list to console, please edit your question.
Second, I created a timing program to test the speeds of the different methods suggested:
class Program
{
private static readonly string file = #"movies.list";
private static readonly int testStart = 1;
private static readonly int numOfTests = 2;
private static readonly int MinTimingVal = 1000;
private static string[] testNames = new string[] {
"Naive",
"OneCallToWrite",
"SomeCallsToWrite",
"InParallel",
"InParallelBlcoks",
"IceManMinds",
"TestTiming"
};
private static double[] avgSecs = new double[numOfTests];
private static int[] testIterations = new int[numOfTests];
public static void Main(string[] args)
{
Console.WriteLine("Starting tests...");
Debug.WriteLine("Starting tests...");
Console.WriteLine("");
Debug.WriteLine("");
//*****************************
//The console is the bottle-neck, so we can
//speed-up redrawing it by only showing 1 line at a time.
Console.WindowHeight = 1;
Console.WindowWidth = 50;
Console.BufferHeight = 100;
Console.BufferWidth = 50;
//******************************
Action[] actionArray = new Action[numOfTests];
actionArray[0] = naive;
actionArray[1] = oneCallToWrite;
actionArray[2] = someCallsToWrite;
actionArray[3] = inParallel;
actionArray[4] = inParallelBlocks;
actionArray[5] = iceManMinds;
actionArray[6] = testTiming;
for (int i = testStart; i < actionArray.Length; i++)
{
Action a = actionArray[i];
DoTiming(a, i);
}
printResults();
Console.WriteLine("");
Debug.WriteLine("");
Console.WriteLine("Tests complete.");
Debug.WriteLine("Tests complete.");
Console.WriteLine("Press Enter to Close Console...");
Debug.WriteLine("Press Enter to Close Console...");
Console.ReadLine();
}
private static void DoTiming(Action a, int num)
{
a.Invoke();
Stopwatch watch = new Stopwatch();
Stopwatch loopWatch = new Stopwatch();
bool shouldRetry = false;
int numOfIterations = 2;
do
{
watch.Start();
for (int i = 0; i < numOfIterations; i++)
{
a.Invoke();
}
watch.Stop();
shouldRetry = false;
if (watch.ElapsedMilliseconds < MinTimingVal) //if the time was less than the minimum, increase load and re-time.
{
shouldRetry = true;
numOfIterations *= 2;
watch.Reset();
}
} while (shouldRetry);
long totalTime = watch.ElapsedMilliseconds;
double avgTime = ((double)totalTime) / (double)numOfIterations;
avgSecs[num] = avgTime / 1000.00;
testIterations[num] = numOfIterations;
}
private static void printResults()
{
Console.WriteLine("");
Debug.WriteLine("");
for (int i = testStart; i < numOfTests; i++)
{
TimeSpan t = TimeSpan.FromSeconds(avgSecs[i]);
Console.WriteLine("ElapsedTime: {0:N4}, " + "test: " + testNames[i], t.ToString() );
Debug.WriteLine("ElapsedTime: {0:N4}, " + "test: " + testNames[i], t.ToString() );
}
}
public static void naive()
{
FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 8, FileOptions.None);
using (StreamReader sr = new StreamReader(fs))
{
while (sr.Peek() >= 0)
{
Console.WriteLine( sr.ReadLine() );
}
}
}
public static void oneCallToWrite()
{
FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 8, FileOptions.None);
using (StreamReader sr = new StreamReader(fs))
{
StringBuilder sb = new StringBuilder();
while (sr.Peek() >= 0)
{
string s = sr.ReadLine();
sb.Append("\n" + s);
}
Console.Write(sb);
}
}
public static void someCallsToWrite()
{
FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 8, FileOptions.None);
using (StreamReader sr = new StreamReader(fs))
{
StringBuilder sb = new StringBuilder();
int count = 0;
int mod = 10000;
while (sr.Peek() >= 0)
{
count++;
string s = sr.ReadLine();
sb.Append("\n" + s);
if (count % mod == 0)
{
Console.Write(sb);
sb = new StringBuilder();
}
}
Console.Write( sb );
}
}
public static void inParallel()
{
string[] wordsFromFile = File.ReadAllLines( file );
int length = wordsFromFile.Length;
Parallel.For( 0, length, i => {
Console.WriteLine( wordsFromFile[i] );
});
}
public static void inParallelBlocks()
{
string[] wordsFromFile = File.ReadAllLines(file);
int length = wordsFromFile.Length;
Parallel.For<StringBuilder>(0, length,
() => { return new StringBuilder(); },
(i, loopState, sb) =>
{
sb.Append("\n" + wordsFromFile[i]);
return sb;
},
(x) => { Console.Write(x); }
);
}
#region iceManMinds
public static void iceManMinds()
{
string FileName = file;
long ThreadReadBlockSize = 50000;
int NumberOfThreads = 4;
byte[] _inputString;
var fi = new FileInfo(FileName);
long totalBytesRead = 0;
long fileLength = fi.Length;
long readPosition = 0L;
Console.WriteLine("Reading Lines From {0}", FileName);
var threads = new Thread[NumberOfThreads];
var instances = new ReadThread[NumberOfThreads];
_inputString = new byte[fileLength];
while (totalBytesRead < fileLength)
{
for (int i = 0; i < NumberOfThreads; i++)
{
var rt = new ReadThread { StartPosition = readPosition, BlockSize = ThreadReadBlockSize };
instances[i] = rt;
threads[i] = new Thread(rt.Read);
threads[i].Start();
readPosition += ThreadReadBlockSize;
}
for (int i = 0; i < NumberOfThreads; i++)
{
threads[i].Join();
}
for (int i = 0; i < NumberOfThreads; i++)
{
if (instances[i].BlockSize > 0)
{
Array.Copy(instances[i].Output, 0L, _inputString, instances[i].StartPosition,
instances[i].BlockSize);
totalBytesRead += instances[i].BlockSize;
}
}
}
string finalString = Encoding.ASCII.GetString(_inputString);
Console.WriteLine(finalString);//.Substring(104250000, 50000));
}
private class ReadThread
{
public long StartPosition { get; set; }
public long BlockSize { get; set; }
public byte[] Output { get; private set; }
public void Read()
{
Output = new byte[BlockSize];
var inStream = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
inStream.Seek(StartPosition, SeekOrigin.Begin);
BlockSize = inStream.Read(Output, 0, (int)BlockSize);
inStream.Close();
}
}
#endregion
public static void testTiming()
{
Thread.Sleep(500);
}
}
Each of these tests print the file out to console.
When run under default Console settings, each test took between 5:30 and 6:10 (Min:Sec).
After considering the Console properties, by making Console.WindowHeight = 1, that is, only 1 line is shown at a time, (you can scroll up and down to see the most recent 100 lines), and I achieved a speed-up.
Currently, the task completes in just a little over 2:40 (Min:Sec) for most methods.
Try it out on your computer and see how it works for you.
Interestingly enough, the different methods were basically equivalent, with the OP's code being basically the fastest.
The timing code warms-up the code then runs it twice and averages the time it takes, it does this for each method.
Feel free to try out your own methods and time them.
The answer to this question really depends on what it is you will be doing with the data. If your intention truly is to just read in the file and dump the contents to the console screen, then it would be better to use the StringBuilder Class to build up a string of, say 1000 lines, then dump the contents to the screen, reset the string then read in another 1000 lines, dump them, etc etc...
However if you are trying to build something that is part of a larger project and you are using .NET 4.0, you can use the MemoryMappedFile Class to read the file and create a CreateViewAccessor to create a "window" that operates on just a portion of the data instead of reading in the entire file.
Another option would be to make Threads that read different parts of the file all at once, then puts it all together in the end.
If you can be more specific as to what you plan to do with this data, I can help you more. Hope this helps!
EDIT:
Try this code out man. I was able to read the whole list in literally 3 seconds time using Threads:
using System;
using System.IO;
using System.Text;
using System.Threading;
namespace ConsoleApplication36
{
class Program
{
private const string FileName = #"C:\Users\Public\movies.list";
private const long ThreadReadBlockSize = 50000;
private const int NumberOfThreads = 4;
private static byte[] _inputString;
static void Main(string[] args)
{
var fi = new FileInfo(FileName);
long totalBytesRead = 0;
long fileLength = fi.Length;
long readPosition = 0L;
Console.WriteLine("Reading Lines From {0}", FileName);
var threads = new Thread[NumberOfThreads];
var instances = new ReadThread[NumberOfThreads];
_inputString = new byte[fileLength];
while (totalBytesRead < fileLength)
{
for (int i = 0; i < NumberOfThreads; i++)
{
var rt = new ReadThread { StartPosition = readPosition, BlockSize = ThreadReadBlockSize };
instances[i] = rt;
threads[i] = new Thread(rt.Read);
threads[i].Start();
readPosition += ThreadReadBlockSize;
}
for (int i = 0; i < NumberOfThreads; i++)
{
threads[i].Join();
}
for (int i = 0; i < NumberOfThreads; i++)
{
if (instances[i].BlockSize > 0)
{
Array.Copy(instances[i].Output, 0L, _inputString, instances[i].StartPosition,
instances[i].BlockSize);
totalBytesRead += instances[i].BlockSize;
}
}
}
string finalString = Encoding.ASCII.GetString(_inputString);
Console.WriteLine(finalString.Substring(104250000, 50000));
}
private class ReadThread
{
public long StartPosition { get; set; }
public long BlockSize { get; set; }
public byte[] Output { get; private set; }
public void Read()
{
Output = new byte[BlockSize];
var inStream = new FileStream(FileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
inStream.Seek(StartPosition, SeekOrigin.Begin);
BlockSize = inStream.Read(Output, 0, (int)BlockSize);
inStream.Close();
}
}
}
}
You will need to change the FileName to match the location of your movies.list file. Also, you can adjust the total number of threads. I used 4, but you can decrease or increase this at will. You can also change the Block Size...This is how much data each thread reads in. Also, I'm assuming its an ASCII text file. If its not, you need to change the encoding type to UTF8 or whatever encoding the file is in. Good luck!
Related
So, i try to synchronize two progress bar like that:
long TotalSize; //Total size of files in Directory "Source"
foreach (string file in Directory.GetFiles("Source", "*.*", SearchOption.AllDirectories))
{
long FileSize; //Current file size in recursive search
long CurrentFileSize //It's mean `+=` add size of current file in recursive search
using (FileStream readfile = new FileStream(file, FileMode.Open, FileAccess.Read))
{
byte[] buffer = new byte[1024 * 1024];
int Readbyte;
long TotalByteSize = 0;
while ((readbyte = readfile.Read(buffer, 0, buffer.Length)) > 0)
{
TotalByteSize += Readbyte;
CurrentFileProgress.Value = (int)((float)TotalByteSize / FileSize * 100.0f); //Progress when reading current file bytes
}
}
OverallFilesProgress.Value = (int)((float)CurrentFileSize/ TotalSize * 100.0f); //Overall progress when read all files
}
This works fine, but it's not a synchronization, it - paralleled the progress bar. Can u show me example how to really synchronize two progress bar (current and overall). In my head, I understand that I need to somehow save the read bytes of the current file into a variable, and then add this to the overall progress, but in practice I can’t do it. Second problem is - overall progress bar is start later 1 sec (bcuz wait while loop). P.S. Only filestream, no BGW.
It looks like you have some issues with your counters, it should probably look something like:
int overallTotal = ...;
int overallCompleted = 0;
foreach(var file in files){
// open file etc
int currentCompleted = 0;
int currentTotal = file.Size;
while (...){
overallCompleted += noReadBytes;
currentCompleted += noReadBytes;
CurrentFileProgress.Value = (int)(currentCompleted * 100.0f/ currentTotal );
OverallFilesProgress.Value = (int)(overallCompleted * 100.0f/ overallTotal );
}
}
That should ensure both progress bars are updated continuously. Note that you should probably do the reading on a background thread and use Progress<T>.Report to report progress. Or use asynchronous reads to avoid blocking the UI thread.
One of many ways is with a for-next loop on the count of the Directory.GetAllFiles() result and then use the FileStream.ReadAsync() method to read (for example) 1 MB chunks without blocking the UI. The ProgressBar updates will be on the UI thread in this way, as they should be.
const int ONE_MB = 0x100000;
private async Task loadFilesAsync(string selectedPath)
{
string[] files = Directory.GetFiles(selectedPath, "*.*", SearchOption.AllDirectories);
for (int count = 0; count < files.Length; count++)
{
progressBarOverall.Value = (int)((count/(float)files.Length) * 100);
string path = files[count];
using (FileStream fileStream = new FileStream(path, FileMode.Open))
{
int offset = 0;
long length = fileStream.Length;
byte[] buffer = new byte[length];
int readLength = ONE_MB;
while (offset != length)
{
if (offset + readLength > length)
{
readLength = (int)length - offset;
}
offset += await fileStream.ReadAsync(buffer, offset, readLength);
progressBarSingle.Value = (int)((offset / (float)length) * 100);
}
}
}
tableLayoutPanelStatus.Visible = false;
}
Testing
public partial class MainForm : Form
{
public MainForm()
{
InitializeComponent();
tsmiBrowseFolder.Click += onClickBrowseFolder;
}
private readonly FolderBrowserDialog _folderBrowser = new FolderBrowserDialog
{
RootFolder= Environment.SpecialFolder.ApplicationData,
};
private void onClickBrowseFolder(object sender, EventArgs e)
{
if(_folderBrowser.ShowDialog() == DialogResult.OK)
{
_ = loadFilesAsync(_folderBrowser.SelectedPath);
}
}
.
.
.
}
This question already has answers here:
Edit a specific Line of a Text File in C#
(6 answers)
Closed 5 years ago.
I have to write an implementation of string that stores it's values on hard drive instead of ram (I know how stupid it sounds, but it's intended to teach us how different sorting algorithms work on ram and hard drive). This is what I've written so far:
class HDDArray : IEnumerable<int>
{
private string filePath;
public int this[int index]
{
get
{
using (var reader = new StreamReader(filePath))
{
string line = reader.ReadLine();
for (int i = 0; i < index; i++)
{
line = reader.ReadLine();
}
return Convert.ToInt32(line);
}
}
set
{
using (var fs = File.Open(filePath, FileMode.OpenOrCreate, FileAccess.ReadWrite))
{
var reader = new StreamReader(fs);
var writer = new StreamWriter(fs);
for (int i = 0; i < index; i++)
{
reader.ReadLine();
}
writer.WriteLine(value);
writer.Dispose();
}
}
}
public int Length
{
get
{
int length = 0;
using (var reader = new StreamReader(filePath))
{
while (reader.ReadLine() != null)
{
length++;
}
}
return length;
}
}
public HDDArray(string file)
{
filePath = file;
if (File.Exists(file))
File.WriteAllText(file, String.Empty);
else
File.Create(file).Dispose();
}
public IEnumerator<int> GetEnumerator()
{
using (var reader = new StreamReader(filePath))
{
string line;
while ((line = reader.ReadLine()) != null)
{
yield return Convert.ToInt32(line);
}
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
The problem I'm facing is when trying to edit a line (in the the set portion of the indexer) I end up adding a new line instead of editing the old one (it's pretty obvious why, I just can't figure how to fix it).
Your array is designed to work with integers. Such a class is quite easy to create because the length of all numbers is 4 bytes.
class HDDArray : IEnumerable<int>, IDisposable
{
readonly FileStream stream;
readonly BinaryWriter writer;
readonly BinaryReader reader;
public HDDArray(string file)
{
stream = new FileStream(file, FileMode.Create, FileAccess.ReadWrite);
writer = new BinaryWriter(stream);
reader = new BinaryReader(stream);
}
public int this[int index]
{
get
{
stream.Position = index * 4;
return reader.ReadInt32();
}
set
{
stream.Position = index * 4;
writer.Write(value);
}
}
public int Length
{
get
{
return (int)stream.Length / 4;
}
}
public IEnumerator<int> GetEnumerator()
{
stream.Position = 0;
while (reader.PeekChar() != -1)
yield return reader.ReadInt32();
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
public void Dispose()
{
reader?.Dispose();
writer?.Dispose();
stream?.Dispose();
}
}
Since the size of each array element is known, we can simply move to stream by changing its Position property.
BinaryWriter and BinaryReader are very comfortable to write and read numbers.
Open stream is a very heavy operation. Hence do it once when you create the class. At the end of the work, you need to clean up after themselves. So I implemented the IDisposable interface.
Usage:
HDDArray arr = new HDDArray("test.dat");
Console.WriteLine("Length: " + arr.Length);
for (int i = 0; i < 10; i++)
arr[i] = i;
Console.WriteLine("Length: " + arr.Length);
foreach (var n in arr)
Console.WriteLine(n);
// Console.WriteLine(arr[20]); // Exception!
arr.Dispose(); // release resources
I stand to be corrected, but I dont think there is an easy way to re-write a specific line, so you will probably find it easier to rewrite the file - modifying that line.
You could change your set code as follows:
set
{
var allLinesInFile = File.ReadAllLines(filepath);
allLinesInFile[index] = value;
File.WriteAllLines(filepath, allLinesInFile);
}
Goes without saying that there should be some safety checks in there to check the file exists and index < allLinesInFile.Length
I think for the sake of homework of sorting algorithms you needn't bother yourself memory size issues.
Of course please add checking file existing to read.
Note: Line counting in example starts from 0.
string[] lines = File.ReadAllLines(filePath);
using (StreamWriter writer = new StreamWriter(filePath))
{
for (int currentLineNmb = 0; currentLineNmb < lines.Length; currentLineNmb++ )
{
if (currentLineNmb == lineToEditNmb)
{
writer.WriteLine(lineToWrite);
continue;
}
writer.WriteLine(lines[currentLineNmb]);
}
}
I am creating a word list of possible uppercase letters to prove how insecure 8 digit passwords are this code will write aaaaaaaa to aaaaaaab to aaaaaaac etc. until zzzzzzzz using this code:
class Program
{
static string path;
static int file = 0;
static void Main(string[] args)
{
new_file();
var alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789+-*_!$£^=<>§°ÖÄÜöäü.;:,?{}[]";
var q = alphabet.Select(x => x.ToString());
int size = 3;
int counter = 0;
for (int i = 0; i < size - 1; i++)
{
q = q.SelectMany(x => alphabet, (x, y) => x + y);
}
foreach (var item in q)
{
if (counter >= 20000000)
{
new_file();
counter = 0;
}
if (File.Exists(path))
{
using (StreamWriter sw = File.AppendText(path))
{
sw.WriteLine(item);
Console.WriteLine(item);
/*if (!(Regex.IsMatch(item, #"(.)\1")))
{
sw.WriteLine(item);
counter++;
}
else
{
Console.WriteLine(item);
}*/
}
}
else
{
new_file();
}
}
}
static void new_file()
{
path = #"C:\" + "list" + file + ".txt";
if (!File.Exists(path))
{
using (StreamWriter sw = File.CreateText(path))
{
}
}
file++;
}
}
The Code is working fine but it takes Weeks to run it. Does anyone know a way to speed it up or do I have to wait? If anyone has a idea please tell me.
Performance:
size 3: 0.02s
size 4: 1.61s
size 5: 144.76s
Hints:
removed LINQ for combination generation
removed Console.WriteLine for each password
removed StreamWriter
large buffer (128k) for file writing
const string alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789+-*_!$£^=<>§°ÖÄÜöäü.;:,?{}[]";
var byteAlphabet = alphabet.Select(ch => (byte)ch).ToArray();
var alphabetLength = alphabet.Length;
var newLine = new[] { (byte)'\r', (byte)'\n' };
const int size = 4;
var number = new byte[size];
var password = Enumerable.Range(0, size).Select(i => byteAlphabet[0]).Concat(newLine).ToArray();
var watcher = new System.Diagnostics.Stopwatch();
watcher.Start();
var isRunning = true;
for (var counter = 0; isRunning; counter++)
{
Console.Write("{0}: ", counter);
Console.Write(password.Select(b => (char)b).ToArray());
using (var file = System.IO.File.Create(string.Format(#"list.{0:D5}.txt", counter), 2 << 16))
{
for (var i = 0; i < 2000000; ++i)
{
file.Write(password, 0, password.Length);
var j = size - 1;
for (; j >= 0; j--)
{
if (number[j] < alphabetLength - 1)
{
password[j] = byteAlphabet[++number[j]];
break;
}
else
{
number[j] = 0;
password[j] = byteAlphabet[0];
}
}
if (j < 0)
{
isRunning = false;
break;
}
}
}
}
watcher.Stop();
Console.WriteLine(watcher.Elapsed);
}
Try the following modified code. In LINQPad it runs in < 1 second. With your original code I gave up after 40 seconds. It removes the overhead of opening and closing the file for every WriteLine operation. You'll need to test and ensure it gives the same results because I'm not willing to run your original code for 24 hours to ensure the output is the same.
class Program
{
static string path;
static int file = 0;
static void Main(string[] args)
{
new_file();
var alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789+-*_!$£^=<>§°ÖÄÜöäü.;:,?{}[]";
var q = alphabet.Select(x => x.ToString());
int size = 3;
int counter = 0;
for (int i = 0; i < size - 1; i++)
{
q = q.SelectMany(x => alphabet, (x, y) => x + y);
}
StreamWriter sw = File.AppendText(path);
try
{
foreach (var item in q)
{
if (counter >= 20000000)
{
sw.Dispose();
new_file();
counter = 0;
}
sw.WriteLine(item);
Console.WriteLine(item);
}
}
finally
{
if(sw != null)
{
sw.Dispose();
}
}
}
static void new_file()
{
path = #"C:\temp\list" + file + ".txt";
if (!File.Exists(path))
{
using (StreamWriter sw = File.CreateText(path))
{
}
}
file++;
}
}
your alphabet is missing 0
With that fixed there would be 89 chars in your set. Let's call it 100 for simplicity. The set you are looking for is all the 8 character length strings drawn from that set. There are 100^8 of these, i.e. 10,000,000,000,000,000.
The disk space they will take up depends on how you encode them, lets be generous - assume you use some 8 bit char set that contains the these characters, and you don't put in carriage returns, so one byte per char, so 10,000,000,000,000,000 bytes =~ 10 peta byes?
Do you have 10 petabytes of disk? (10000 TB)?
[EDIT] In response to 'this is not an answer':
The original motivation is to create the list? The shows how large the list would be. Its hard to see what could be DONE with the list if it was actualised, i.e. it would always be quicker to reproduce it than to load it. Surely whatever point could be made by producing the list can also be made by simply knowing it's size, which the above shows how to work it out.
There are LOTS of inefficiencies in you code, but if your questions is 'how can i quickly produce this list and write it to disk' the answer is 'you literally cannot'.
[/EDIT]
I'm new to C#, and I'm trying to use task async await for a WinsForm GUI. I've read so many tutorials about it, but all of them implement tasks differently. Some tasks use functions, and others just put the code in to execute. Some use Task.Run() or just await. Furthermore, all the examples I've seen are of functions that are included in the UI class. I'm trying to run functions that are in classes that are within my UI. I'm just really confused now, and don't know what's right/wrong.
What I'm trying to do is write a file to an EEPROM, using the SpringCard API/ PC/SC library. I parse the file into packets and write it to the smart card. I also want to update a status label and progress bar. A lot of things can go wrong. I have flags set in the smart card, and right now I just a while loop running until it reads a certain flag, which will obviously stall the program if it's forever waiting for a flag.
I guess I'm just confused about how to set it up. Help. I've tried using Tasks. Here is my code so far.
/* Initialize open file dialog */
OpenFileDialog ofd = new OpenFileDialog();
ofd.Multiselect = false;
ofd.Filter = "BIN Files (.bin)|*.bin|HEX Files (.hex)|*.hex";
ofd.InitialDirectory = "C:";
ofd.Title = "Select File";
//Check open file dialog result
if (ofd.ShowDialog() != DialogResult.OK)
{
if (shade != null)
{
shade.Dispose();
shade = null;
}
return;
}
//progform.Show();
Progress<string> progress = new Progress<string>();
file = new ATAC_File(ofd.FileName);
try
{
cardchannel.DisconnectReset();
Task upgrade = upgradeASYNC();
if(cardchannel.Connect())
{
await upgrade;
}
else
{
add_log_text("Connection to the card failed");
MessageBox.Show("Failed to connect to the card in the reader : please check that you don't have another application running in background that tries to work with the smartcards in the same time");
if (shade != null)
{
shade.Dispose();
shade = null;
}
cardchannel = null;
}
}
private async Task upgradeASYNC()
{
int i = 0;
int totalpackets = 0;
add_log_text("Parsing file into packets.");
totalpackets = file.parseFile();
/*progress.Report(new MyTaskProgressReport
{
CurrentProgressAmount = i,
TotalProgressAmount = totalpackets,
CurrentProgressMessage = "Sending upgrade file..."
});*/
ST_EEPROMM24LR64ER chip = new ST_EEPROMM24LR64ER(this, cardchannel, file, EEPROM.DONOTHING);
bool writefile = chip.WriteFileASYNC();
if(writefile)
{
add_log_text("WRITE FILE OK.");
}
else
{
add_log_text("WRITE FILE BAD.");
}
}
In the file class:
public int parseFile()
{
FileStream fs = new FileStream(filename, FileMode.Open, FileAccess.Read);
BinaryReader br = new BinaryReader(fs);
FileInfo finfo = new FileInfo(filename);
int readbytecount = 0;
int packetcount = 0;
int numofbytesleft = 0;
byte[] hash = new byte[4];
byte[] packetinfo = new byte[4];
byte[] filechunk = null;
/* Read file until all file bytes read */
while (size_int > readbytecount)
{
//Initialize packet array
filechunk = new byte[MAXDATASIZE];
//read into byte array of max write size
if (packetcount < numoffullpackets)
{
//Initialize packet info array
packetinfo[0] = (byte)((size_int + 1) % 0x0100); //packetcountlo
packetinfo[1] = (byte)((size_int + 1) / 0x0100); //packetcounthi
packetinfo[2] = (byte)((packetcount + 1) / 0x0100); //packetcounthi
packetinfo[3] = (byte)((packetcount + 1) % 0x0100); //packetcountlo
//read bytes from file into packet array
bytesread = br.Read(filechunk, 0, MAXDATASIZE);
//add number of bytes read to readbytecount
readbytecount += bytesread;
}
//read EOF into byte array of size smaller than max write size
else if (packetcount == numoffullpackets)
{
//find out how many bytes left to read
numofbytesleft = size_int - (MAXDATASIZE * numoffullpackets);
//Initialize packet info array
packetinfo[0] = (byte)((size_int + 1) / 0x0100); //packetcounthi
packetinfo[1] = (byte)((size_int + 1) % 0x0100); //packetcountlo
packetinfo[2] = (byte)((packetcount + 1) / 0x0100); //packetcounthi
packetinfo[3] = (byte)((packetcount + 1) % 0x0100); //packetcountlo
//Initialize array and add byte padding, MAXWRITESIZE-4 because the other 4 bytes will be added when we append the CRC
//filechunk = new byte[numofbytesleft];
for (int j = 0; j < numofbytesleft; j++)
{
//read byte from file
filechunk[j] = br.ReadByte();
//add number of bytes read to readbytecount
readbytecount++;
}
for (int j = numofbytesleft; j < MAXDATASIZE; j++)
{
filechunk[j] = 0xFF;
}
}
else
{
MessageBox.Show("ERROR");
}
//calculate crc32 on byte array
int i = 0;
foreach (byte b in crc32.ComputeHash(filechunk))
{
hash[i++] = b;
}
//Append hash to filechunk to create new byte array named chunk
byte[] chunk = new byte[MAXWRITESIZE];
Buffer.BlockCopy(packetinfo, 0, chunk, 0, packetinfo.Length);
Buffer.BlockCopy(filechunk, 0, chunk, packetinfo.Length, filechunk.Length);
Buffer.BlockCopy(hash, 0, chunk, (packetinfo.Length + filechunk.Length), hash.Length);
//Add chunk to byte array list
packetcount++;
PacketBYTE.Add(chunk);
}
parseCMD();
return PacketBYTE.Count;
}
In the EEPROM class:
public bool WriteFileASYNC()
{
int blocknum = ATAC_CONSTANTS.RFBN_RFstartwrite;
byte[] response = null;
CAPDU[] EEPROMcmd = null;
int packetCount = 0;
log("ATTEMPT: Read response funct flag.");
do
{
StopRF();
Thread.SpinWait(100);
StartRF();
log("ATTEMPT: Write function flag.");
while (!WriteFlag(ATAC_CONSTANTS.RFBN_functflag, EEPROM.UPLOADAPP)) ;
} while (ReadFunctFlag(ATAC_CONSTANTS.RFBN_responseflag, 0) != EEPROM.UPLOADAPP);
for (int EEPROMcount = 0; EEPROMcount < file.CmdBYTE.Count; EEPROMcount++)
{
string temp = "ATTEMPT: Write EEPROM #" + EEPROMcount.ToString();
log(temp);
EEPROMcmd = file.CmdBYTE[EEPROMcount];
while (EEPROMcmd[blocknum] != null)
{
if (blocknum % 32 == 0)
{
string tempp = "ATTEMPT: Write packet #" + packetCount.ToString();
log("ATTEMPT: Write packet #");
packetCount++;
}
do
{
response = WriteBinaryASYNC(EEPROMcmd[blocknum]);
} while (response == null);
blocknum++;
}
log("ATTEMPT: Write packet flag.");
while (!WriteFlag(ATAC_CONSTANTS.RFBN_packetflag, ATAC_CONSTANTS.RFflag)) ;
log("ATTEMPT: Write packet flag.");
do
{
StopRF();
Thread.SpinWait(300);
StartRF();
} while (!ReadFlag(ATAC_CONSTANTS.RFBN_packetresponseflag, ((blocknum/32) - 1)*(EEPROMcount+1)));
blocknum = ATAC_CONSTANTS.RFBN_RFstartwrite;
}
return true;
}
Tasks are threads.
When you write this:
Task upgrade = upgradeASYNC();
you are simply executing upgradeASYNC in a new thread.
When you write this:
await upgrade;
You are only waiting for the new thread to finish (before going to the next instruction).
And this method
private async Task upgradeASYNC()
returns a Task object only because you add the async keyword. But in the body of this method there is no await. So it just runs synchronously, like any thread job.
I don't have time to rewrite your code, i let that to another stackoverflow user. You should learn and work harder ;)
I have a txt file that the format is:
0.32423 1.3453 3.23423
0.12332 3.1231 9.23432432
9.234324234 -1.23432 12.23432
...
Each line has three double value. There are more than 10000 lines in this file. I can use the ReadStream.ReadLine and use the String.Split, then convert it.
I want to know is there any faster method to do it.
Best Regards,
StreamReader.ReadLine, String.Split and Double.TryParse sounds like a good solution here.
No need for improvement.
There may be some little micro-optimisations you can perform, but the way you've suggested sounds about as simple as you'll get.
10000 lines shouldn't take very long - have you tried it and found you've actually got a performance problem? For example, here are two short programs - one creates a 10,000 line file and the other reads it:
CreateFile.cs:
using System;
using System.IO;
public class Test
{
static void Main()
{
Random rng = new Random();
using (TextWriter writer = File.CreateText("test.txt"))
{
for (int i = 0; i < 10000; i++)
{
writer.WriteLine("{0} {1} {2}", rng.NextDouble(),
rng.NextDouble(), rng.NextDouble());
}
}
}
}
ReadFile.cs:
using System;
using System.Diagnostics;
using System.IO;
using System.Linq;
public class Test
{
static void Main()
{
Stopwatch sw = Stopwatch.StartNew();
using (TextReader reader = File.OpenText("test.txt"))
{
string line;
while ((line = reader.ReadLine()) != null)
{
string[] bits = line.Split(' ');
foreach (string bit in bits)
{
double value;
if (!double.TryParse(bit, out value))
{
Console.WriteLine("Bad value");
}
}
}
}
sw.Stop();
Console.WriteLine("Total time: {0}ms",
sw.ElapsedMilliseconds);
}
}
On my netbook (which admittedly has an SSD in) it only takes 82ms to read the file. I would suggest that's probably not a problem :)
I would suggest reading all your lines at once with
string[] lines = System.IO.File.ReadAllLines(fileName);
This wold ensure that the I/O is done with the maximum efficiency. You woul have to measure (profile) but I would expect the conversions to take far less time.
your method is already good!
you can improve it by writing a readline function that returns an array of double and you reuse this function in other programs.
This solution is a little bit slower (see benchmarks at the end), but its nicer to read. It should also be more memory efficient because only the current character is buffered at the time (instead of the whole file or line).
Reading arrays is an additional feature in this reader which assumes that the size of the array always comes first as an int-value.
IParsable is another feature, that makes it easy to implement Parse methods for various types.
class StringSteamReader {
private StreamReader sr;
public StringSteamReader(StreamReader sr) {
this.sr = sr;
this.Separator = ' ';
}
private StringBuilder sb = new StringBuilder();
public string ReadWord() {
eol = false;
sb.Clear();
char c;
while (!sr.EndOfStream) {
c = (char)sr.Read();
if (c == Separator) break;
if (IsNewLine(c)) {
eol = true;
char nextch = (char)sr.Peek();
while (IsNewLine(nextch)) {
sr.Read(); // consume all newlines
nextch = (char)sr.Peek();
}
break;
}
sb.Append(c);
}
return sb.ToString();
}
private bool IsNewLine(char c) {
return c == '\r' || c == '\n';
}
public int ReadInt() {
return int.Parse(ReadWord());
}
public double ReadDouble() {
return double.Parse(ReadWord());
}
public bool EOF {
get { return sr.EndOfStream; }
}
public char Separator { get; set; }
bool eol;
public bool EOL {
get { return eol || sr.EndOfStream; }
}
public T ReadObject<T>() where T : IParsable, new() {
var obj = new T();
obj.Parse(this);
return obj;
}
public int[] ReadIntArray() {
int size = ReadInt();
var a = new int[size];
for (int i = 0; i < size; i++) {
a[i] = ReadInt();
}
return a;
}
public double[] ReadDoubleArray() {
int size = ReadInt();
var a = new double[size];
for (int i = 0; i < size; i++) {
a[i] = ReadDouble();
}
return a;
}
public T[] ReadObjectArray<T>() where T : IParsable, new() {
int size = ReadInt();
var a = new T[size];
for (int i = 0; i < size; i++) {
a[i] = ReadObject<T>();
}
return a;
}
internal void NextLine() {
eol = false;
}
}
interface IParsable {
void Parse(StringSteamReader r);
}
It can be used like this:
public void Parse(StringSteamReader r) {
double x = r.ReadDouble();
int y = r.ReadInt();
string z = r.ReadWord();
double[] arr = r.ReadDoubleArray();
MyParsableObject o = r.ReadObject<MyParsableObject>();
MyParsableObject [] oarr = r.ReadObjectArray<MyParsableObject>();
}
I did some benchmarking, comparing StringStreamReader with some other approaches, already proposed (StreamReader.ReadLine and File.ReadAllLines). Here are the methods I used for benchmarking:
private static void Test_StringStreamReader(string filename) {
var sw = new Stopwatch();
sw.Start();
using (var sr = new StreamReader(new FileStream(filename, FileMode.Open, FileAccess.Read))) {
var r = new StringSteamReader(sr);
r.Separator = ' ';
while (!r.EOF) {
var dbls = new List<double>();
while (!r.EOF) {
dbls.Add(r.ReadDouble());
}
}
}
sw.Stop();
Console.WriteLine("elapsed: {0}", sw.Elapsed);
}
private static void Test_ReadLine(string filename) {
var sw = new Stopwatch();
sw.Start();
using (var sr = new StreamReader(new FileStream(filename, FileMode.Open, FileAccess.Read))) {
var dbls = new List<double>();
while (!sr.EndOfStream) {
string line = sr.ReadLine();
string[] bits = line.Split(' ');
foreach(string bit in bits) {
dbls.Add(double.Parse(bit));
}
}
}
sw.Stop();
Console.WriteLine("elapsed: {0}", sw.Elapsed);
}
private static void Test_ReadAllLines(string filename) {
var sw = new Stopwatch();
sw.Start();
string[] lines = System.IO.File.ReadAllLines(filename);
var dbls = new List<double>();
foreach(var line in lines) {
string[] bits = line.Split(' ');
foreach (string bit in bits) {
dbls.Add(double.Parse(bit));
}
}
sw.Stop();
Console.WriteLine("Test_ReadAllLines: {0}", sw.Elapsed);
}
I used a file with 1.000.000 lines of double values (3 values each line). File is located on a SSD disk and each test was repeated multiple times in release-mode. These are the results (on average):
Test_StringStreamReader: 00:00:01.1980975
Test_ReadLine: 00:00:00.9117553
Test_ReadAllLines: 00:00:01.1362452
So, as mentioned StringStreamReader is a bit slower than the other approaches. For 10.000 lines, the performance is around (120ms / 95ms / 100ms).