C# task async await smart card - UI thread blocked - c#

I'm new to C#, and I'm trying to use task async await for a WinsForm GUI. I've read so many tutorials about it, but all of them implement tasks differently. Some tasks use functions, and others just put the code in to execute. Some use Task.Run() or just await. Furthermore, all the examples I've seen are of functions that are included in the UI class. I'm trying to run functions that are in classes that are within my UI. I'm just really confused now, and don't know what's right/wrong.
What I'm trying to do is write a file to an EEPROM, using the SpringCard API/ PC/SC library. I parse the file into packets and write it to the smart card. I also want to update a status label and progress bar. A lot of things can go wrong. I have flags set in the smart card, and right now I just a while loop running until it reads a certain flag, which will obviously stall the program if it's forever waiting for a flag.
I guess I'm just confused about how to set it up. Help. I've tried using Tasks. Here is my code so far.
/* Initialize open file dialog */
OpenFileDialog ofd = new OpenFileDialog();
ofd.Multiselect = false;
ofd.Filter = "BIN Files (.bin)|*.bin|HEX Files (.hex)|*.hex";
ofd.InitialDirectory = "C:";
ofd.Title = "Select File";
//Check open file dialog result
if (ofd.ShowDialog() != DialogResult.OK)
{
if (shade != null)
{
shade.Dispose();
shade = null;
}
return;
}
//progform.Show();
Progress<string> progress = new Progress<string>();
file = new ATAC_File(ofd.FileName);
try
{
cardchannel.DisconnectReset();
Task upgrade = upgradeASYNC();
if(cardchannel.Connect())
{
await upgrade;
}
else
{
add_log_text("Connection to the card failed");
MessageBox.Show("Failed to connect to the card in the reader : please check that you don't have another application running in background that tries to work with the smartcards in the same time");
if (shade != null)
{
shade.Dispose();
shade = null;
}
cardchannel = null;
}
}
private async Task upgradeASYNC()
{
int i = 0;
int totalpackets = 0;
add_log_text("Parsing file into packets.");
totalpackets = file.parseFile();
/*progress.Report(new MyTaskProgressReport
{
CurrentProgressAmount = i,
TotalProgressAmount = totalpackets,
CurrentProgressMessage = "Sending upgrade file..."
});*/
ST_EEPROMM24LR64ER chip = new ST_EEPROMM24LR64ER(this, cardchannel, file, EEPROM.DONOTHING);
bool writefile = chip.WriteFileASYNC();
if(writefile)
{
add_log_text("WRITE FILE OK.");
}
else
{
add_log_text("WRITE FILE BAD.");
}
}
In the file class:
public int parseFile()
{
FileStream fs = new FileStream(filename, FileMode.Open, FileAccess.Read);
BinaryReader br = new BinaryReader(fs);
FileInfo finfo = new FileInfo(filename);
int readbytecount = 0;
int packetcount = 0;
int numofbytesleft = 0;
byte[] hash = new byte[4];
byte[] packetinfo = new byte[4];
byte[] filechunk = null;
/* Read file until all file bytes read */
while (size_int > readbytecount)
{
//Initialize packet array
filechunk = new byte[MAXDATASIZE];
//read into byte array of max write size
if (packetcount < numoffullpackets)
{
//Initialize packet info array
packetinfo[0] = (byte)((size_int + 1) % 0x0100); //packetcountlo
packetinfo[1] = (byte)((size_int + 1) / 0x0100); //packetcounthi
packetinfo[2] = (byte)((packetcount + 1) / 0x0100); //packetcounthi
packetinfo[3] = (byte)((packetcount + 1) % 0x0100); //packetcountlo
//read bytes from file into packet array
bytesread = br.Read(filechunk, 0, MAXDATASIZE);
//add number of bytes read to readbytecount
readbytecount += bytesread;
}
//read EOF into byte array of size smaller than max write size
else if (packetcount == numoffullpackets)
{
//find out how many bytes left to read
numofbytesleft = size_int - (MAXDATASIZE * numoffullpackets);
//Initialize packet info array
packetinfo[0] = (byte)((size_int + 1) / 0x0100); //packetcounthi
packetinfo[1] = (byte)((size_int + 1) % 0x0100); //packetcountlo
packetinfo[2] = (byte)((packetcount + 1) / 0x0100); //packetcounthi
packetinfo[3] = (byte)((packetcount + 1) % 0x0100); //packetcountlo
//Initialize array and add byte padding, MAXWRITESIZE-4 because the other 4 bytes will be added when we append the CRC
//filechunk = new byte[numofbytesleft];
for (int j = 0; j < numofbytesleft; j++)
{
//read byte from file
filechunk[j] = br.ReadByte();
//add number of bytes read to readbytecount
readbytecount++;
}
for (int j = numofbytesleft; j < MAXDATASIZE; j++)
{
filechunk[j] = 0xFF;
}
}
else
{
MessageBox.Show("ERROR");
}
//calculate crc32 on byte array
int i = 0;
foreach (byte b in crc32.ComputeHash(filechunk))
{
hash[i++] = b;
}
//Append hash to filechunk to create new byte array named chunk
byte[] chunk = new byte[MAXWRITESIZE];
Buffer.BlockCopy(packetinfo, 0, chunk, 0, packetinfo.Length);
Buffer.BlockCopy(filechunk, 0, chunk, packetinfo.Length, filechunk.Length);
Buffer.BlockCopy(hash, 0, chunk, (packetinfo.Length + filechunk.Length), hash.Length);
//Add chunk to byte array list
packetcount++;
PacketBYTE.Add(chunk);
}
parseCMD();
return PacketBYTE.Count;
}
In the EEPROM class:
public bool WriteFileASYNC()
{
int blocknum = ATAC_CONSTANTS.RFBN_RFstartwrite;
byte[] response = null;
CAPDU[] EEPROMcmd = null;
int packetCount = 0;
log("ATTEMPT: Read response funct flag.");
do
{
StopRF();
Thread.SpinWait(100);
StartRF();
log("ATTEMPT: Write function flag.");
while (!WriteFlag(ATAC_CONSTANTS.RFBN_functflag, EEPROM.UPLOADAPP)) ;
} while (ReadFunctFlag(ATAC_CONSTANTS.RFBN_responseflag, 0) != EEPROM.UPLOADAPP);
for (int EEPROMcount = 0; EEPROMcount < file.CmdBYTE.Count; EEPROMcount++)
{
string temp = "ATTEMPT: Write EEPROM #" + EEPROMcount.ToString();
log(temp);
EEPROMcmd = file.CmdBYTE[EEPROMcount];
while (EEPROMcmd[blocknum] != null)
{
if (blocknum % 32 == 0)
{
string tempp = "ATTEMPT: Write packet #" + packetCount.ToString();
log("ATTEMPT: Write packet #");
packetCount++;
}
do
{
response = WriteBinaryASYNC(EEPROMcmd[blocknum]);
} while (response == null);
blocknum++;
}
log("ATTEMPT: Write packet flag.");
while (!WriteFlag(ATAC_CONSTANTS.RFBN_packetflag, ATAC_CONSTANTS.RFflag)) ;
log("ATTEMPT: Write packet flag.");
do
{
StopRF();
Thread.SpinWait(300);
StartRF();
} while (!ReadFlag(ATAC_CONSTANTS.RFBN_packetresponseflag, ((blocknum/32) - 1)*(EEPROMcount+1)));
blocknum = ATAC_CONSTANTS.RFBN_RFstartwrite;
}
return true;
}

Tasks are threads.
When you write this:
Task upgrade = upgradeASYNC();
you are simply executing upgradeASYNC in a new thread.
When you write this:
await upgrade;
You are only waiting for the new thread to finish (before going to the next instruction).
And this method
private async Task upgradeASYNC()
returns a Task object only because you add the async keyword. But in the body of this method there is no await. So it just runs synchronously, like any thread job.
I don't have time to rewrite your code, i let that to another stackoverflow user. You should learn and work harder ;)

Related

Sync current progress bar with overall when read a files

So, i try to synchronize two progress bar like that:
long TotalSize; //Total size of files in Directory "Source"
foreach (string file in Directory.GetFiles("Source", "*.*", SearchOption.AllDirectories))
{
long FileSize; //Current file size in recursive search
long CurrentFileSize //It's mean `+=` add size of current file in recursive search
using (FileStream readfile = new FileStream(file, FileMode.Open, FileAccess.Read))
{
byte[] buffer = new byte[1024 * 1024];
int Readbyte;
long TotalByteSize = 0;
while ((readbyte = readfile.Read(buffer, 0, buffer.Length)) > 0)
{
TotalByteSize += Readbyte;
CurrentFileProgress.Value = (int)((float)TotalByteSize / FileSize * 100.0f); //Progress when reading current file bytes
}
}
OverallFilesProgress.Value = (int)((float)CurrentFileSize/ TotalSize * 100.0f); //Overall progress when read all files
}
This works fine, but it's not a synchronization, it - paralleled the progress bar. Can u show me example how to really synchronize two progress bar (current and overall). In my head, I understand that I need to somehow save the read bytes of the current file into a variable, and then add this to the overall progress, but in practice I can’t do it. Second problem is - overall progress bar is start later 1 sec (bcuz wait while loop). P.S. Only filestream, no BGW.
It looks like you have some issues with your counters, it should probably look something like:
int overallTotal = ...;
int overallCompleted = 0;
foreach(var file in files){
// open file etc
int currentCompleted = 0;
int currentTotal = file.Size;
while (...){
overallCompleted += noReadBytes;
currentCompleted += noReadBytes;
CurrentFileProgress.Value = (int)(currentCompleted * 100.0f/ currentTotal );
OverallFilesProgress.Value = (int)(overallCompleted * 100.0f/ overallTotal );
}
}
That should ensure both progress bars are updated continuously. Note that you should probably do the reading on a background thread and use Progress<T>.Report to report progress. Or use asynchronous reads to avoid blocking the UI thread.
One of many ways is with a for-next loop on the count of the Directory.GetAllFiles() result and then use the FileStream.ReadAsync() method to read (for example) 1 MB chunks without blocking the UI. The ProgressBar updates will be on the UI thread in this way, as they should be.
const int ONE_MB = 0x100000;
private async Task loadFilesAsync(string selectedPath)
{
string[] files = Directory.GetFiles(selectedPath, "*.*", SearchOption.AllDirectories);
for (int count = 0; count < files.Length; count++)
{
progressBarOverall.Value = (int)((count/(float)files.Length) * 100);
string path = files[count];
using (FileStream fileStream = new FileStream(path, FileMode.Open))
{
int offset = 0;
long length = fileStream.Length;
byte[] buffer = new byte[length];
int readLength = ONE_MB;
while (offset != length)
{
if (offset + readLength > length)
{
readLength = (int)length - offset;
}
offset += await fileStream.ReadAsync(buffer, offset, readLength);
progressBarSingle.Value = (int)((offset / (float)length) * 100);
}
}
}
tableLayoutPanelStatus.Visible = false;
}
Testing
public partial class MainForm : Form
{
public MainForm()
{
InitializeComponent();
tsmiBrowseFolder.Click += onClickBrowseFolder;
}
private readonly FolderBrowserDialog _folderBrowser = new FolderBrowserDialog
{
RootFolder= Environment.SpecialFolder.ApplicationData,
};
private void onClickBrowseFolder(object sender, EventArgs e)
{
if(_folderBrowser.ShowDialog() == DialogResult.OK)
{
_ = loadFilesAsync(_folderBrowser.SelectedPath);
}
}
.
.
.
}

Intersect and Union in byte array of 2 files

I have 2 files.
1 is Source File and 2nd is Destination file.
Below is my code for Intersect and Union two file using byte array.
FileStream frsrc = new FileStream("Src.bin", FileMode.Open);
FileStream frdes = new FileStream("Des.bin", FileMode.Open);
int length = 24; // get file length
byte[] src = new byte[length];
byte[] des = new byte[length]; // create buffer
int Counter = 0; // actual number of bytes read
int subcount = 0;
while (frsrc.Read(src, 0, length) > 0)
{
try
{
Counter = 0;
frdes.Position = subcount * length;
while (frdes.Read(des, 0, length) > 0)
{
var data = src.Intersect(des);
var data1 = src.Union(des);
Counter++;
}
subcount++;
Console.WriteLine(subcount.ToString());
}
}
catch (Exception ex)
{
}
}
It is works fine with fastest speed.
but Now the problem is that I want count of it and when I Use below code then it becomes very slow.
var data = src.Intersect(des).Count();
var data1 = src.Union(des).Count();
So, Is there any solution for that ?
If yes,then please lete me know as soon as possible.
Thanks
Intersect and Union are not the fastest operations. The reason you see it being fast is that you never actually enumerate the results!
Both return an enumerable, not the actual results of the operation. You're supposed to go through that and enumerate the enumerable, otherwise nothing happens - this is called "deferred execution". Now, when you do Count, you actually enumerate the enumerable, and incur the full cost of the Intersect and Union - believe me, the Count itself is relatively trivial (though still an O(n) operation!).
You'll need to make your own methods, most likely. You want to avoid the enumerable overhead, and more importantly, you'll probably want a lookup table.
A few points: the comment // get file length is misleading as it is the buffer size. Counter is not the number of bytes read, it is the number of blocks read. data and data1 will end up with the result of the last block read, ignoring any data before them. That is assuming that nothing goes wrong in the while loop - you need to remove the try structure to see if there are any errors.
What you can do is count the number of occurences of each byte in each file, then if the count of a byte in any file is greater than one then it is is a member of the intersection of the files, and if the count of a byte in all the files is greater than one then it is a member of the union of the files.
It is just as easy to write the code for more than two files as it is for two files, whereas LINQ is easy for two but a little bit more fiddly for more than two. (I put in a comparison with using LINQ in a naïve fashion for only two files at the end.)
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
var file1 = #"C:\Program Files (x86)\Electronic Arts\Crysis 3\Bin32\Crysis3.exe"; // 26MB
var file2 = #"C:\Program Files (x86)\Electronic Arts\Crysis 3\Bin32\d3dcompiler_46.dll"; // 3MB
List<string> files = new List<string> { file1, file2 };
var sw = System.Diagnostics.Stopwatch.StartNew();
// Prepare array of counters for the bytes
var nFiles = files.Count;
int[][] count = new int[nFiles][];
for (int i = 0; i < nFiles; i++)
{
count[i] = new int[256];
}
// Get the counts of bytes in each file
int bufLen = 32768;
byte[] buffer = new byte[bufLen];
int bytesRead;
for (int fileNum = 0; fileNum < nFiles; fileNum++)
{
using (var sr = new FileStream(files[fileNum], FileMode.Open, FileAccess.Read))
{
bytesRead = bufLen;
while (bytesRead > 0)
{
bytesRead = sr.Read(buffer, 0, bufLen);
for (int i = 0; i < bytesRead; i++)
{
count[fileNum][buffer[i]]++;
}
}
}
}
// Find which bytes are in any of the files or in all the files
var inAny = new List<byte>(); // union
var inAll = new List<byte>(); // intersect
for (int i = 0; i < 256; i++)
{
Boolean all = true;
for (int fileNum = 0; fileNum < nFiles; fileNum++)
{
if (count[fileNum][i] > 0)
{
if (!inAny.Contains((byte)i)) // avoid adding same value more than once
{
inAny.Add((byte)i);
}
}
else
{
all = false;
}
};
if (all)
{
inAll.Add((byte)i);
};
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
// Display the results
Console.WriteLine("Union: " + string.Join(",", inAny.Select(x => x.ToString("X2"))));
Console.WriteLine();
Console.WriteLine("Intersect: " + string.Join(",", inAll.Select(x => x.ToString("X2"))));
Console.WriteLine();
// Compare to using LINQ.
// N/B. Will need adjustments for more than two files.
var srcBytes1 = File.ReadAllBytes(file1);
var srcBytes2 = File.ReadAllBytes(file2);
sw.Restart();
var intersect = srcBytes1.Intersect(srcBytes2).ToArray().OrderBy(x => x);
var union = srcBytes1.Union(srcBytes2).ToArray().OrderBy(x => x);
Console.WriteLine(sw.ElapsedMilliseconds);
Console.WriteLine("Union: " + String.Join(",", union.Select(x => x.ToString("X2"))));
Console.WriteLine();
Console.WriteLine("Intersect: " + String.Join(",", intersect.Select(x => x.ToString("X2"))));
Console.ReadLine();
}
}
}
The counting-the-byte-occurences method is roughly five times faster than the LINQ method on my computer, even without the latter loading the files and on a range of file sizes (a few KB to a few MB).

getting mulitple images from a single stream piped from ffmpeg stdout

I start a process to retrieve a few frames from a video file with ffmpeg,
ffmpeg -i "<videofile>.mp4" -frames:v 10 -f image2pipe pipe:1
and pipe the images to stdout -
var cmd = Process.Start(p);
var stream = cmd.StandardOutput.BaseStream;
var img = Image.FromStream(stream);
Getting the first image this way works, but how do I get all of them?
OK this was gobspackingly easy, kind of embarrassed I asked here. I'll post the answer in case it will help anyone else.
The first few bytes in the stream will be repeated every time there is a new image. I guessed the first 8 would do and voila.
static IEnumerable<Image> GetThumbnails(Stream stream)
{
byte[] allImages;
using (var ms = new MemoryStream())
{
stream.CopyTo(ms);
allImages = ms.ToArray();
}
var bof = allImages.Take(8).ToArray(); //??
var prevOffset = -1;
foreach (var offset in GetBytePatternPositions(allImages, bof))
{
if (prevOffset > -1)
yield return GetImageAt(allImages, prevOffset, offset);
prevOffset = offset;
}
if (prevOffset > -1)
yield return GetImageAt(allImages, prevOffset, allImages.Length);
}
static Image GetImageAt(byte[] data, int start, int end)
{
using (var ms = new MemoryStream(end - start))
{
ms.Write(data, start, end - start);
return Image.FromStream(ms);
}
}
static IEnumerable<int> GetBytePatternPositions(byte[] data, byte[] pattern)
{
var dataLen = data.Length;
var patternLen = pattern.Length - 1;
int scanData = 0;
int scanPattern = 0;
while (scanData < dataLen)
{
if (pattern[0] == data[scanData])
{
scanPattern = 1;
scanData++;
while (pattern[scanPattern] == data[scanData])
{
if (scanPattern == patternLen)
{
yield return scanData - patternLen;
break;
}
scanPattern++;
scanData++;
}
}
scanData++;
}
}

how to get file parallel using HttpWebRequest

I'm trying to make a program like IDM, that can download parts of the file simultaneously.
The tool i'm using to achieve this is TPL in C# .Net4.5
But I'm having a problem when using Tasks to make the operation parallel.
The sequence function is functioning well and it is downloading the files correctly.
The parallel function using Tasks is working until something weird happens:
I've created 4 tasks, with Factory.StartNew(), in each task the start position and the end position are given, the task will download these files, then it'll return it in byte[], and everything is going well, the tasks are working fine, but at some point, the executing freezes and that's it, the program stops and nothing else happens.
the implementation of the parallel function:
static void DownloadPartsParallel()
{
string uriPath = "http://mschnlnine.vo.llnwd.net/d1/pdc08/PPTX/BB01.pptx";
Uri uri = new Uri(uriPath);
long l = GetFileSize(uri);
Console.WriteLine("Size={0}", l);
int granularity = 4;
byte[][] arr = new byte[granularity][];
Task<byte[]>[] tasks = new Task<byte[]>[granularity];
tasks[0] = Task<byte[]>.Factory.StartNew(() => DownloadPartOfFile(uri, 0, l / granularity));
tasks[1] = Task<byte[]>.Factory.StartNew(() => DownloadPartOfFile(uri, l / granularity + 1, l / granularity + l / granularity));
tasks[2] = Task<byte[]>.Factory.StartNew(() => DownloadPartOfFile(uri, l / granularity + l / granularity + 1, l / granularity + l / granularity + l / granularity));
tasks[3] = Task<byte[]>.Factory.StartNew(() => DownloadPartOfFile(uri, l / granularity + l / granularity + l / granularity + 1, l));//(l / granularity) + (l / granularity) + (l / granularity) + (l / granularity)
arr[0] = tasks[0].Result;
arr[1] = tasks[1].Result;
arr[2] = tasks[2].Result;
arr[3] = tasks[3].Result;
Stream localStream;
localStream = File.Create("E:\\a\\" + Path.GetFileName(uri.LocalPath));
for (int i = 0; i < granularity; i++)
{
if (i == granularity - 1)
{
for (int j = 0; j < arr[i].Length - 1; j++)
{
localStream.WriteByte(arr[i][j]);
}
}
else
for (int j = 0; j < arr[i].Length; j++)
{
localStream.WriteByte(arr[i][j]);
}
}
}
the DownloadPartOfFile function implementation:
public static byte[] DownloadPartOfFile(Uri fileUrl, long from, long to)
{
int bytesProcessed = 0;
BinaryReader reader = null;
WebResponse response = null;
byte[] bytes = new byte[(to - from) + 1];
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(fileUrl);
request.AddRange(from, to);
request.ReadWriteTimeout = int.MaxValue;
request.Timeout = int.MaxValue;
if (request != null)
{
response = request.GetResponse();
if (response != null)
{
reader = new BinaryReader(response.GetResponseStream());
int bytesRead;
do
{
byte[] buffer = new byte[1024];
bytesRead = reader.Read(buffer, 0, buffer.Length);
if (bytesRead == 0)
{
break;
}
Array.Resize<byte>(ref buffer, bytesRead);
buffer.CopyTo(bytes, bytesProcessed);
bytesProcessed += bytesRead;
Console.WriteLine(Thread.CurrentThread.ManagedThreadId + ",Downloading" + bytesProcessed);
} while (bytesRead > 0);
}
}
}
catch (Exception e)
{
Console.WriteLine(e.Message);
}
finally
{
if (response != null) response.Close();
if (reader != null) reader.Close();
}
return bytes;
}
I tried to solve it by setting int.MaxValue to the reading timeout, writing reading timeout, and timeout, that's why the program freezes, if i didn't do that, an exception of timeout will occur while in function DownloadPartsParallel
so is there a solution, or any other advice that may help, thanks.
I would use HttpClient.SendAsync rather than WebRequest (see "HttpClient is Here!").
I would not use any extra threads. The HttpClient.SendAsync API is naturally asynchronous and returns an awaitable Task<>, there is no need to offload it to a pool thread with Task.Run/Task.TaskFactory.StartNew (see this for a detailed discussion).
I would also limit the number of parallel downloads with SemaphoreSlim.WaitAsync(). Below is my take as a console app (not extensively tested):
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
namespace Console_21737681
{
class Program
{
const int MAX_PARALLEL = 4; // max parallel downloads
const int CHUNK_SIZE = 2048; // size of a single chunk
// a chunk of downloaded data
class Chunk
{
public long Start { get; set; }
public int Length { get; set; }
public byte[] Data { get; set; }
};
// throttle downloads
SemaphoreSlim _throttleSemaphore = new SemaphoreSlim(MAX_PARALLEL);
// get a chunk
async Task<Chunk> GetChunk(HttpClient client, long start, int length, string url)
{
await _throttleSemaphore.WaitAsync();
try
{
using (var request = new HttpRequestMessage(HttpMethod.Get, url))
{
request.Headers.Range = new System.Net.Http.Headers.RangeHeaderValue(start, start + length - 1);
using (var response = await client.SendAsync(request))
{
var data = await response.Content.ReadAsByteArrayAsync();
return new Chunk { Start = start, Length = length/*, Data = data*/ };
}
}
}
finally
{
_throttleSemaphore.Release();
}
}
// download the URL in parallel by chunks
async Task<Chunk[]> DownloadAsync(string url)
{
using (var client = new HttpClient())
{
var request = new HttpRequestMessage(HttpMethod.Head, url);
var response = await client.SendAsync(request);
var contentLength = response.Content.Headers.ContentLength;
if (!contentLength.HasValue)
throw new InvalidOperationException("ContentLength");
var numOfChunks = (int)((contentLength.Value + CHUNK_SIZE - 1) / CHUNK_SIZE);
var tasks = Enumerable.Range(0, numOfChunks).Select(i =>
{
// start a new chunk
long start = i * CHUNK_SIZE;
var length = (int)Math.Min(CHUNK_SIZE, contentLength.Value - start);
return GetChunk(client, start, length, url);
}).ToList();
await Task.WhenAll(tasks);
// the order of chunks is random
return tasks.Select(task => task.Result).ToArray();
}
}
static void Main(string[] args)
{
var program = new Program();
var chunks = program.DownloadAsync("http://flaglane.com/download/australian-flag/australian-flag-large.png").Result;
Console.WriteLine("Chunks: " + chunks.Count());
Console.ReadLine();
}
}
}
OK, here's how I would do what you're attempting. This is basically the same idea, just implemented differently.
public static void DownloadFileInPiecesAndSave()
{
//test
var uri = new Uri("http://www.w3.org/");
var bytes = DownloadInPieces(uri, 4);
File.WriteAllBytes(#"c:\temp\RangeDownloadSample.html", bytes);
}
/// <summary>
/// Donwload a file via HTTP in multiple pieces using a Range request.
/// </summary>
public static byte[] DownloadInPieces(Uri uri, uint numberOfPieces)
{
//I'm just fudging this for expository purposes. In reality you would probably want to do a HEAD request to get total file size.
ulong totalFileSize = 1003;
var pieceSize = totalFileSize / numberOfPieces;
List<Task<byte[]>> tasks = new List<Task<byte[]>>();
for (uint i = 0; i < numberOfPieces; i++)
{
var start = i * pieceSize;
var end = start + (i == numberOfPieces - 1 ? pieceSize + totalFileSize % numberOfPieces : pieceSize);
tasks.Add(DownloadFilePiece(uri, start, end));
}
Task.WaitAll(tasks.ToArray());
//This is probably not the single most efficient way to combine byte arrays, but it is succinct...
return tasks.SelectMany(t => t.Result).ToArray();
}
private static async Task<byte[]> DownloadFilePiece(Uri uri, ulong rangeStart, ulong rangeEnd)
{
try
{
var request = (HttpWebRequest)WebRequest.Create(uri);
request.AddRange((long)rangeStart, (long)rangeEnd);
request.Proxy = WebProxy.GetDefaultProxy();
using (var response = await request.GetResponseAsync())
using (var responseStream = response.GetResponseStream())
using (var memoryStream = new MemoryStream((int)(rangeEnd - rangeStart)))
{
await responseStream.CopyToAsync(memoryStream);
return memoryStream.ToArray();
}
}
catch (WebException wex)
{
//Do lots of error handling here, lots of things can go wrong
//In particular watch for 416 Requested Range Not Satisfiable
return null;
}
catch (Exception ex)
{
//handle the unexpected here...
return null;
}
}
Note that I glossed over a lot of stuff here, such as:
Detecting if the server supports range requests. If it doesn't then the server will return the entire content in each request, and we'll get several copies of it.
Handling any sort of HTTP errors. What if the third request fails?
Retry logic
Timeouts
Figuring out how big the file actually is
Checking whether the file is big enough to warrant multiple requests, and if so how many? It's probably not worth doing this in parallel for files under 1 or 2 MB, but you'd have to test
Most likely a bunch of other stuff.
So you've got a long way to go before I would use this in production. But it should give you an idea of where to start.

Most efficient way for reading IMDB movies list

I am reading IMDB movies listing from a text file on my harddrive (originally available from IMDB site at ftp://ftp.fu-berlin.de/pub/misc/movies/database/movies.list.gz).
It takes around 5 minutes on my machine (basic info: Win7 x64bit, 16GB RAM, 500 GB SATA Hardisk 7200 RPM) to read this file line by line using code below.
I have two questions:
Is there any way I can optimize code to improve the read time?
Data access don't need to be sequential as I won't mind reading data from top to bottom / bottom to top or any order for that matter as long as it read one line at a time. I am wondering is there a way to read in multiple directions to improve the read time?
The application is a Windows Console Application.
Update: Many responses correctly pointed out that Writing to the Console takes substantial time. Considering that the displaying of data on the Windows Console is now desirable but not mandatory.
//Code Block
string file = #"D:\movies.list";
FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 8, FileOptions.None);
using (StreamReader sr = new StreamReader(fs))
{
while (sr.Peek() >= 0)
{
Console.WriteLine(sr.ReadLine());
}
}
I'm not certain whether this is more efficient or not, but an alternate method would be to use File.ReadAllLines:
var movieFile = File.ReadAllLines(file);
foreach (var movie in movieFile)
Console.WriteLine(movie);
I am not a c# developer, but how about doing a bulk insert into database using the file(which will be one time). Then you can reuse the data and export as well.
In .net 4 you can use File.ReadLines for lazy evaluation and thus lower RAM usage when working on large files.
You can do linq operation directly on files and this along with File.ReadLines would improve load time.
For better understanding you can check, Read text file word-by-word using LINQ
You can also do comparison as well but putting time intervals.
However if you making web app you can read whole file on application start event and cache them in application pool for better performanace.
First of all, if you don't care about printing out the list to console, please edit your question.
Second, I created a timing program to test the speeds of the different methods suggested:
class Program
{
private static readonly string file = #"movies.list";
private static readonly int testStart = 1;
private static readonly int numOfTests = 2;
private static readonly int MinTimingVal = 1000;
private static string[] testNames = new string[] {
"Naive",
"OneCallToWrite",
"SomeCallsToWrite",
"InParallel",
"InParallelBlcoks",
"IceManMinds",
"TestTiming"
};
private static double[] avgSecs = new double[numOfTests];
private static int[] testIterations = new int[numOfTests];
public static void Main(string[] args)
{
Console.WriteLine("Starting tests...");
Debug.WriteLine("Starting tests...");
Console.WriteLine("");
Debug.WriteLine("");
//*****************************
//The console is the bottle-neck, so we can
//speed-up redrawing it by only showing 1 line at a time.
Console.WindowHeight = 1;
Console.WindowWidth = 50;
Console.BufferHeight = 100;
Console.BufferWidth = 50;
//******************************
Action[] actionArray = new Action[numOfTests];
actionArray[0] = naive;
actionArray[1] = oneCallToWrite;
actionArray[2] = someCallsToWrite;
actionArray[3] = inParallel;
actionArray[4] = inParallelBlocks;
actionArray[5] = iceManMinds;
actionArray[6] = testTiming;
for (int i = testStart; i < actionArray.Length; i++)
{
Action a = actionArray[i];
DoTiming(a, i);
}
printResults();
Console.WriteLine("");
Debug.WriteLine("");
Console.WriteLine("Tests complete.");
Debug.WriteLine("Tests complete.");
Console.WriteLine("Press Enter to Close Console...");
Debug.WriteLine("Press Enter to Close Console...");
Console.ReadLine();
}
private static void DoTiming(Action a, int num)
{
a.Invoke();
Stopwatch watch = new Stopwatch();
Stopwatch loopWatch = new Stopwatch();
bool shouldRetry = false;
int numOfIterations = 2;
do
{
watch.Start();
for (int i = 0; i < numOfIterations; i++)
{
a.Invoke();
}
watch.Stop();
shouldRetry = false;
if (watch.ElapsedMilliseconds < MinTimingVal) //if the time was less than the minimum, increase load and re-time.
{
shouldRetry = true;
numOfIterations *= 2;
watch.Reset();
}
} while (shouldRetry);
long totalTime = watch.ElapsedMilliseconds;
double avgTime = ((double)totalTime) / (double)numOfIterations;
avgSecs[num] = avgTime / 1000.00;
testIterations[num] = numOfIterations;
}
private static void printResults()
{
Console.WriteLine("");
Debug.WriteLine("");
for (int i = testStart; i < numOfTests; i++)
{
TimeSpan t = TimeSpan.FromSeconds(avgSecs[i]);
Console.WriteLine("ElapsedTime: {0:N4}, " + "test: " + testNames[i], t.ToString() );
Debug.WriteLine("ElapsedTime: {0:N4}, " + "test: " + testNames[i], t.ToString() );
}
}
public static void naive()
{
FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 8, FileOptions.None);
using (StreamReader sr = new StreamReader(fs))
{
while (sr.Peek() >= 0)
{
Console.WriteLine( sr.ReadLine() );
}
}
}
public static void oneCallToWrite()
{
FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 8, FileOptions.None);
using (StreamReader sr = new StreamReader(fs))
{
StringBuilder sb = new StringBuilder();
while (sr.Peek() >= 0)
{
string s = sr.ReadLine();
sb.Append("\n" + s);
}
Console.Write(sb);
}
}
public static void someCallsToWrite()
{
FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 8, FileOptions.None);
using (StreamReader sr = new StreamReader(fs))
{
StringBuilder sb = new StringBuilder();
int count = 0;
int mod = 10000;
while (sr.Peek() >= 0)
{
count++;
string s = sr.ReadLine();
sb.Append("\n" + s);
if (count % mod == 0)
{
Console.Write(sb);
sb = new StringBuilder();
}
}
Console.Write( sb );
}
}
public static void inParallel()
{
string[] wordsFromFile = File.ReadAllLines( file );
int length = wordsFromFile.Length;
Parallel.For( 0, length, i => {
Console.WriteLine( wordsFromFile[i] );
});
}
public static void inParallelBlocks()
{
string[] wordsFromFile = File.ReadAllLines(file);
int length = wordsFromFile.Length;
Parallel.For<StringBuilder>(0, length,
() => { return new StringBuilder(); },
(i, loopState, sb) =>
{
sb.Append("\n" + wordsFromFile[i]);
return sb;
},
(x) => { Console.Write(x); }
);
}
#region iceManMinds
public static void iceManMinds()
{
string FileName = file;
long ThreadReadBlockSize = 50000;
int NumberOfThreads = 4;
byte[] _inputString;
var fi = new FileInfo(FileName);
long totalBytesRead = 0;
long fileLength = fi.Length;
long readPosition = 0L;
Console.WriteLine("Reading Lines From {0}", FileName);
var threads = new Thread[NumberOfThreads];
var instances = new ReadThread[NumberOfThreads];
_inputString = new byte[fileLength];
while (totalBytesRead < fileLength)
{
for (int i = 0; i < NumberOfThreads; i++)
{
var rt = new ReadThread { StartPosition = readPosition, BlockSize = ThreadReadBlockSize };
instances[i] = rt;
threads[i] = new Thread(rt.Read);
threads[i].Start();
readPosition += ThreadReadBlockSize;
}
for (int i = 0; i < NumberOfThreads; i++)
{
threads[i].Join();
}
for (int i = 0; i < NumberOfThreads; i++)
{
if (instances[i].BlockSize > 0)
{
Array.Copy(instances[i].Output, 0L, _inputString, instances[i].StartPosition,
instances[i].BlockSize);
totalBytesRead += instances[i].BlockSize;
}
}
}
string finalString = Encoding.ASCII.GetString(_inputString);
Console.WriteLine(finalString);//.Substring(104250000, 50000));
}
private class ReadThread
{
public long StartPosition { get; set; }
public long BlockSize { get; set; }
public byte[] Output { get; private set; }
public void Read()
{
Output = new byte[BlockSize];
var inStream = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
inStream.Seek(StartPosition, SeekOrigin.Begin);
BlockSize = inStream.Read(Output, 0, (int)BlockSize);
inStream.Close();
}
}
#endregion
public static void testTiming()
{
Thread.Sleep(500);
}
}
Each of these tests print the file out to console.
When run under default Console settings, each test took between 5:30 and 6:10 (Min:Sec).
After considering the Console properties, by making Console.WindowHeight = 1, that is, only 1 line is shown at a time, (you can scroll up and down to see the most recent 100 lines), and I achieved a speed-up.
Currently, the task completes in just a little over 2:40 (Min:Sec) for most methods.
Try it out on your computer and see how it works for you.
Interestingly enough, the different methods were basically equivalent, with the OP's code being basically the fastest.
The timing code warms-up the code then runs it twice and averages the time it takes, it does this for each method.
Feel free to try out your own methods and time them.
The answer to this question really depends on what it is you will be doing with the data. If your intention truly is to just read in the file and dump the contents to the console screen, then it would be better to use the StringBuilder Class to build up a string of, say 1000 lines, then dump the contents to the screen, reset the string then read in another 1000 lines, dump them, etc etc...
However if you are trying to build something that is part of a larger project and you are using .NET 4.0, you can use the MemoryMappedFile Class to read the file and create a CreateViewAccessor to create a "window" that operates on just a portion of the data instead of reading in the entire file.
Another option would be to make Threads that read different parts of the file all at once, then puts it all together in the end.
If you can be more specific as to what you plan to do with this data, I can help you more. Hope this helps!
EDIT:
Try this code out man. I was able to read the whole list in literally 3 seconds time using Threads:
using System;
using System.IO;
using System.Text;
using System.Threading;
namespace ConsoleApplication36
{
class Program
{
private const string FileName = #"C:\Users\Public\movies.list";
private const long ThreadReadBlockSize = 50000;
private const int NumberOfThreads = 4;
private static byte[] _inputString;
static void Main(string[] args)
{
var fi = new FileInfo(FileName);
long totalBytesRead = 0;
long fileLength = fi.Length;
long readPosition = 0L;
Console.WriteLine("Reading Lines From {0}", FileName);
var threads = new Thread[NumberOfThreads];
var instances = new ReadThread[NumberOfThreads];
_inputString = new byte[fileLength];
while (totalBytesRead < fileLength)
{
for (int i = 0; i < NumberOfThreads; i++)
{
var rt = new ReadThread { StartPosition = readPosition, BlockSize = ThreadReadBlockSize };
instances[i] = rt;
threads[i] = new Thread(rt.Read);
threads[i].Start();
readPosition += ThreadReadBlockSize;
}
for (int i = 0; i < NumberOfThreads; i++)
{
threads[i].Join();
}
for (int i = 0; i < NumberOfThreads; i++)
{
if (instances[i].BlockSize > 0)
{
Array.Copy(instances[i].Output, 0L, _inputString, instances[i].StartPosition,
instances[i].BlockSize);
totalBytesRead += instances[i].BlockSize;
}
}
}
string finalString = Encoding.ASCII.GetString(_inputString);
Console.WriteLine(finalString.Substring(104250000, 50000));
}
private class ReadThread
{
public long StartPosition { get; set; }
public long BlockSize { get; set; }
public byte[] Output { get; private set; }
public void Read()
{
Output = new byte[BlockSize];
var inStream = new FileStream(FileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
inStream.Seek(StartPosition, SeekOrigin.Begin);
BlockSize = inStream.Read(Output, 0, (int)BlockSize);
inStream.Close();
}
}
}
}
You will need to change the FileName to match the location of your movies.list file. Also, you can adjust the total number of threads. I used 4, but you can decrease or increase this at will. You can also change the Block Size...This is how much data each thread reads in. Also, I'm assuming its an ASCII text file. If its not, you need to change the encoding type to UTF8 or whatever encoding the file is in. Good luck!

Categories

Resources