I am developing an application which downloads videos from a given URL. The problem is that I do not receive the entire file content, then cannot play the file. For example, trying to download a video of size ~2.23 MB, gives me only ~2.11 MB. When I use the URL in a browser it shows me a dialog to save the video and the file is downloaded successfully.
I have tried using WebClient class and it works, but I want to download the file in chunks in order to be able to report the status(percentage completed). Here is the code I use:
private void backgroundWorker_DoWork(object sender, DoWorkEventArgs e)
{
int bufferSize = 1024 * 300;
string filePath = saveFileDialog.FileName;
if (File.Exists(filePath))
File.Delete(filePath);
int totalBytes = 0;
HttpWebRequest webRequest = (HttpWebRequest)HttpWebRequest.Create(DownloadUrl);
long contentLength = webRequest.GetResponse().ContentLength;
using (Stream webStream = webRequest.GetResponse().GetResponseStream())
using (StreamReader reader = new StreamReader(webStream))
using (BinaryWriter fileWriter = new BinaryWriter(File.Create(filePath)))
{
do
{
char[] buffer = new char[bufferSize];
bytesRead = reader.ReadBlock(buffer, 0, bufferSize); // also tried with Read(buffer, 0, bufferSize);
totalBytes += bytesRead;
Console.WriteLine("Bytes read: " + bytesRead + " Total Bytes: " + totalBytes + " Content length: " + contentLength);
if (bytesRead > 0)
fileWriter.Write(buffer, 0, bytesRead);
} while (!reader.EndOfStream);
}
}
I have also tried to read until bytesRead = 0, but it's the same result.
Is there something that I am missing?
I would recommend that you use DownloadFileAsync instead. This provides you with two events that makes it much easier to track the progress of your download.
DownloadProgressChangedEventHandler
DownloadFileCompleted
A very simple implementation would look something like this.
WebClient client = new WebClient();
client.DownloadProgressChanged +=
new DownloadProgressChangedEventHandler(DownloadProgressCallback);
client.DownloadFileAsync(DownloadUrl, filePath);
Then have a function that updates your progress bar.
private void DownloadProgressCallback(object sender,
DownloadProgressChangedEventArgs e)
{
myProgressBar.Value = e.ProgressPercentage;
}
You also have access to data like e.BytesReceived and e.TotalBytesToReceive.
Edit:
The main change I had to do was to change your buffer from char[] to byte[], and then use a Stream instead of a StreamReader.
We also check the end of file by checking to see if there are any bytes to write to our hard-drive. If there are none left to write, we know that we are done.
private static void download()
{
int bufferSize = 1024 * 300;
string filePath = "Test.exe";
if (File.Exists(filePath))
File.Delete(filePath);
int totalBytes = 0;
HttpWebRequest webRequest =
(HttpWebRequest)
HttpWebRequest.Create(
#"http://www.rarlab.com/rar/wrar420.exe");
long contentLength = webRequest.GetResponse().ContentLength;
Console.WriteLine(totalBytes);
using (WebResponse webResponse = webRequest.GetResponse())
using (Stream reader = webResponse.GetResponseStream())
using (BinaryWriter fileWriter = new BinaryWriter(File.Create(filePath)))
{
int bytesRead = 0;
byte[] buffer = new byte[bufferSize];
do
{
bytesRead = reader.Read(buffer, 0, buffer.Length);
totalBytes += bytesRead;
fileWriter.Write(buffer, 0, bytesRead);
Console.WriteLine("BytesRead: " + bytesRead + " -- TotalBytes: " + totalBytes);
} while (bytesRead > 0);
}
}
Try
private void backgroundWorker_DoWork(object sender, DoWorkEventArgs e)
{
int bufferSize = 1024 * 300;
string filePath = saveFileDialog.FileName;
if (File.Exists(filePath))
File.Delete(filePath);
int totalBytes = 0;
HttpWebRequest webRequest = (HttpWebRequest)HttpWebRequest.Create(DownloadUrl);
long contentLength = webRequest.GetResponse().ContentLength;
using (Stream webStream = webRequest.GetResponse().GetResponseStream())
using (StreamReader reader = new StreamReader(webStream))
using (BinaryWriter fileWriter = new BinaryWriter(File.Create(filePath)))
{
do
{
char[] buffer = new char[bufferSize];
bytesRead = reader.ReadBlock(buffer, 0, bufferSize); // also tried with Read(buffer, 0, bufferSize);
totalBytes += bytesRead;
Console.WriteLine("Bytes read: " + bytesRead + " Total Bytes: " + totalBytes + " Content length: " + contentLength);
if (bytesRead > 0)
fileWriter.Write(buffer, 0, bytesRead);
} while (!reader.EndOfStream);
fileWriter.flush();
}
}
Hi, that was my bad, I just added the fileWriter.flush(); outside the loop
Related
I have created some avro files. I can use the following commands to convert them to json, just to check whether the files are ok
java -jar avro-tools-1.8.2.jar tojson FileName.avro>outputfilename.json
Now, I have some big avro files and the REST API I m trying to upload to, has size limitations and thus I am trying to upload it in chunks using streams.
The following sample, which just reads from the original file in chunks and copies to another avro file, creates the file perfectly
using System;
using System.IO;
class Test
{
public static void Main()
{
// Specify a file to read from and to create.
string pathSource = #"D:\BDS\AVRO\filename.avro";
string pathNew = #"D:\BDS\AVRO\test\filenamenew.avro";
try
{
using (FileStream fsSource = new FileStream(pathSource,
FileMode.Open, FileAccess.Read))
{
byte[] buffer = new byte[(20 * 1024 * 1024) + 100];
long numBytesToRead = (int)fsSource.Length;
int numBytesRead = 0;
using (FileStream fsNew = new FileStream(pathNew,
FileMode.Append, FileAccess.Write))
{
// Read the source file into a byte array.
//byte[] bytes = new byte[fsSource.Length];
//int numBytesToRead = (int)fsSource.Length;
//int numBytesRead = 0;
while (numBytesToRead > 0)
{
int bytesRead = fsSource.Read(buffer, 0, buffer.Length);
byte[] actualbytes = new byte[bytesRead];
Array.Copy(buffer, actualbytes, bytesRead);
// Read may return anything from 0 to numBytesToRead.
// Break when the end of the file is reached.
if (bytesRead == 0)
break;
numBytesRead += bytesRead;
numBytesToRead -= bytesRead;
fsNew.Write(actualbytes, 0, actualbytes.Length);
}
}
}
// Write the byte array to the other FileStream.
}
catch (FileNotFoundException ioEx)
{
Console.WriteLine(ioEx.Message);
}
}
}
How do I know this creates a ok avro. Because the earlier command to convert to json, again works i.e.
java -jar avro-tools-1.8.2.jar tojson filenamenew.avro>outputfilename.json
However, when I use the same code, but instead of copying to another file, just call a rest api, the file gets uploaded but upon downloading the same file from the server and running the command above to convert to json says - "Not a Data file".
So, obviously something is getting corrupted and I am struggling to figure out what.
This is the snippet
string filenamefullyqualified = path + filename;
Stream stream = System.IO.File.Open(filenamefullyqualified, FileMode.Open, FileAccess.Read, FileShare.None);
long? position = 0;
byte[] buffer = new byte[(20 * 1024 * 1024) + 100];
long numBytesToRead = stream.Length;
int numBytesRead = 0;
do
{
var content = new MultipartFormDataContent();
int bytesRead = stream.Read(buffer, 0, buffer.Length);
byte[] actualbytes = new byte[bytesRead];
Array.Copy(buffer, actualbytes, bytesRead);
if (bytesRead == 0)
break;
//Append Data
url = String.Format("https://{0}.dfs.core.windows.net/raw/datawarehouse/{1}/{2}/{3}/{4}/{5}?action=append&position={6}", datalakeName, filename.Substring(0, filename.IndexOf("_")), year, month, day, filename, position.ToString());
numBytesRead += bytesRead;
numBytesToRead -= bytesRead;
ByteArrayContent byteContent = new ByteArrayContent(actualbytes);
content.Add(byteContent);
method = new HttpMethod("PATCH");
request = new HttpRequestMessage(method, url)
{
Content = content
};
request.Headers.Add("Authorization", "Bearer " + accesstoken);
var response = await client.SendAsync(request);
response.EnsureSuccessStatusCode();
position = position + request.Content.Headers.ContentLength;
Array.Clear(buffer, 0, buffer.Length);
} while (numBytesToRead > 0);
stream.Close();
I have looked through the forum threads but haven't come across anything which deals with splitting of avro files.
I have a hunch that my "content" for the http request isn't right. what is it that I am missing?
If you need more details, I will be happy to provide.
I have found the problem now. The problem was because of MultipartFormDataContent. When an avro file is uploaded with that, it adds extra text like content Type etc, along with removal of many lines (I do not know why).
So, the solution was to upload the contents as "ByteArrayContent" itself and not add it to MultipartFormDataContent like I was doing earlier.
Here is the snippet, almost similar to the one in the question, except that I no longer use MultipartFormDataContent
string filenamefullyqualified = path + filename;
Stream stream = System.IO.File.Open(filenamefullyqualified, FileMode.Open, FileAccess.Read, FileShare.None);
//content.Add(CreateFileContent(fs, path, filename, "text/plain"));
long? position = 0;
byte[] buffer = new byte[(20 * 1024 * 1024) + 100];
long numBytesToRead = stream.Length;
int numBytesRead = 0;
//while ((bytesRead = stream.Read(buffer, 0, buffer.Length)) > 0)
//{
do
{
//var content = new MultipartFormDataContent();
int bytesRead = stream.Read(buffer, 0, buffer.Length);
byte[] actualbytes = new byte[bytesRead];
Array.Copy(buffer, actualbytes, bytesRead);
if (bytesRead == 0)
break;
//Append Data
url = String.Format("https://{0}.dfs.core.windows.net/raw/datawarehouse/{1}/{2}/{3}/{4}/{5}?action=append&position={6}", datalakeName, filename.Substring(0, filename.IndexOf("_")), year, month, day, filename, position.ToString());
numBytesRead += bytesRead;
numBytesToRead -= bytesRead;
ByteArrayContent byteContent = new ByteArrayContent(actualbytes);
//byteContent.Headers.ContentType= new MediaTypeHeaderValue("text/plain");
//content.Add(byteContent);
method = new HttpMethod("PATCH");
//request = new HttpRequestMessage(method, url)
//{
// Content = content
//};
request = new HttpRequestMessage(method, url)
{
Content = byteContent
};
request.Headers.Add("Authorization", "Bearer " + accesstoken);
var response = await client.SendAsync(request);
response.EnsureSuccessStatusCode();
position = position + request.Content.Headers.ContentLength;
Array.Clear(buffer, 0, buffer.Length);
} while (numBytesToRead > 0);
stream.Close();
But the streaming by record will not be able to handle the AVRO file as a whole in a transaction. We may end up in partial success, if some records fail, for example.
If we have a small tool that can split AVRO files based on a threshold number of records, it will be great.
The spark-based split by partition technique does allow to split data set to a pre-defined number of files; but, it does not allow splitting based on the number of records. I.e., I do not want an AVRO file with more than 500 records.
So we have to devise a batching logic based on the comfortable heap size the application can handle along with a two-phase commit, to handle transactions
I'm using HttpWebRequest to upload files to the server. The request sends 2 files to the server, a video file and an image file. I'm trying to track the progress of entire progress but the issue is, the progress log runs separately for each file upload. I want the progress to show only once for both the uploads but I can't quite figure out how to do it. Here's my client side code:
Dictionary<string, string> fields = new Dictionary<string, string>();
fields.Add("username", username);
HttpWebRequest hr = WebRequest.Create(url) as HttpWebRequest;
hr.Timeout = 500000;
string bound = "----------------------------" + DateTime.Now.Ticks.ToString("x");
hr.ContentType = "multipart/form-data; boundary=" + bound;
hr.Method = "POST";
hr.KeepAlive = true;
hr.Credentials = CredentialCache.DefaultCredentials;
byte[] boundBytes = Encoding.ASCII.GetBytes("\r\n--" + bound + "\r\n");
string formDataTemplate = "\r\n--" + bound + "\r\nContent-Disposition: form-data; name=\"{0}\";\r\n\r\n{1}";
Stream s = hr.GetRequestStreamWithTimeout(1000000);
foreach (string key in fields.Keys)
{
byte[] formItemBytes = Encoding.UTF8.GetBytes(
string.Format(formDataTemplate, key, fields[key]));
s.Write(formItemBytes, 0, formItemBytes.Length);
}
s.Write(boundBytes, 0, boundBytes.Length);
string headerTemplate =
"Content-Disposition: form-data; name=\"{0}\"; filename=\"{1}\"\r\n Content-Type: application/octet-stream\r\n\r\n";
List<string> files = new List<string> { fileUrl, thumbUrl };
List<string> type = new List<string> { "video", "thumb" };
int count = 0;
foreach (string f in files)
{
var m = Path.GetFileName(f);
var t = type[count];
var j = string.Format(headerTemplate, t, m);
byte[] headerBytes = Encoding.UTF8.GetBytes(
string.Format(headerTemplate, type[count], Path.GetFileName(f)));
s.Write(headerBytes, 0, headerBytes.Length);
FileStream fs = new FileStream(f, FileMode.Open, FileAccess.Read);
int bytesRead = 0;
long bytesSoFar = 0;
byte[] buffer = new byte[1024];
while ((bytesRead = fs.Read(buffer, 0, buffer.Length)) != 0)
{
bytesSoFar += bytesRead;
s.Write(buffer, 0, buffer.Length);
Console.WriteLine(string.Format("sending file data {0:0.000}%", (bytesSoFar * 100.0f) / fs.Length));
}
s.Write(boundBytes, 0, boundBytes.Length);
fs.Close();
count += 1;
}
s.Close();
string respString = "";
hr.BeginGetResponse((IAsyncResult res) =>
{
WebResponse resp = ((HttpWebRequest)res.AsyncState).EndGetResponse(res);
StreamReader respReader = new StreamReader(resp.GetResponseStream());
respString = respReader.ReadToEnd();
resp.Close();
resp = null;
}, hr);
while (!hr.HaveResponse)
{
Console.Write("hiya bob!");
Thread.Sleep(150);
}
Console.Write(respString);
hr = null;
How do I combine the progress log for both uploads into a single log? Any help is appreciated.
One option is to calculate the total number of bytes you need to send before doing any work:
// Calculate the total size to upload before starting work
long totalToUpload = 0;
foreach (var f in files)
{
totalToUpload += (new FileInfo(f)).Length;
}
Then keep track of the total number of bytes sent in any file, and use that in your calculation of progress:
int count = 0;
long bytesSoFar = 0;
foreach (string f in files)
{
// ... Your existing work ...
while ((bytesRead = fs.Read(buffer, 0, buffer.Length)) != 0)
{
bytesSoFar += bytesRead;
// Make sure to only write the number of bytes read from the file
s.Write(buffer, 0, bytesRead);
// Console.WriteLine takes a string.Format() style string
Console.WriteLine("sending file data {0:0.000}%", (bytesSoFar * 100.0f) / totalToUpload);
}
I need help to improve this code but I do not understand where I made a mistake. The file is downloaded but in a corrupted format. I am using cookies also which is a required part of this method.
/*Downlod task */
public static async Task<int> CreateDownloadTask(string urlToDownload,string sDestinationPath, string cookiedstr)
{
int BufferSize = 4096;
int receivedBytes = 0;
int totalBytes = 0;
WebClient client = new WebClient();
client.Headers.Add(HttpRequestHeader.Cookie, cookiedstr);
using (var stream = await client.OpenReadTaskAsync(urlToDownload))
{
using (MemoryStream ms = new MemoryStream())
{
int read = 0;
totalBytes = Int32.Parse(client.ResponseHeaders[HttpResponseHeader.ContentLength]);
var buffer = new byte[BufferSize];
while ((read = await stream.ReadAsync(buffer, 0, buffer.Length)) > 0)
{
ms.Write(buffer, 0, read);
FileStream file = new FileStream(sDestinationPath, FileMode.Append, FileAccess.Write);
ms.Position = 0;
ms.WriteTo(file);
file.Flush();
file.Close();
buffer = new byte[BufferSize];
receivedBytes += read;
Console.WriteLine(receivedBytes + " " + totalBytes);
// DownloadBytesProgress args = new DownloadBytesProgress(urlToDownload, receivedBytes, totalBytes);
}
ms.Close();
}
return receivedBytes;
}
}
My requirement is to transfer a zip file of size 400MB or more; The following code works for at least 40MB; But for more I would have to change byte[] bytes = new byte[50000000]; to byte[] bytes = new byte[400000000]; and maxRequestLength to maxRequestLength="409600";
The problem is byte[] bytes = new byte[100000000]; returns an error regarding insufficient space. So how can I transfer large files using WebClient??
WebClient client = new WebClient();
client.AllowWriteStreamBuffering = true;
UriBuilder ub = new UriBuilder("http://localhost:57596/UploadImages.ashx");
ub.Query = "ImageName=" + "DataSet" + DataSetId + ".zip";
client.OpenWriteCompleted += (InputStream, eArguments) =>
{
try
{
using (Stream output = eArguments.Result)
{
output.Write(ImagesAux, 0, (int)ImagesAux.Length);
//numeroimagem++;
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
//throw;
}
};
client.OpenWriteAsync(ub.Uri);
in UploadImages.ashx
public void ProcessRequest(HttpContext context)
{
//context.Response.ContentType = "text/plain";
//context.Response.Write("Hello World");
string ImageName = context.Request.QueryString["ImageName"];
string UploadPath = context.Server.MapPath("~/ServerImages/");
using (FileStream stream = File.Create(UploadPath + ImageName))
{
byte[] bytes = new byte[50000000]; //
int bytesToRead = 0;
while ((bytesToRead =
context.Request.InputStream.Read(bytes, 0, bytes.Length)) != 0)
{
stream.Write(bytes, 0, bytesToRead);
stream.Close();
}
}
}
in Web.config
<httpRuntime targetFramework="4.5" maxRequestLength="40960"/>
You should never load everything in memory then write all back to disk, but instead you should load pieces and write them to disk while you are reading them.
When you've done reading you close the stream you are writing to.
Otherwise as soon as you reach sizes as GB you can get an OutOfMemory really quick.
So I would change the writing bytes to disk from this:
using (FileStream stream = File.Create(UploadPath + ImageName))
{
byte[] bytes = new byte[50000000]; //
int bytesToRead = 0;
while ((bytesToRead = context.Request.InputStream.Read(bytes, 0, bytes.Length)) != 0)
{
stream.Write(bytes, 0, bytesToRead);
stream.Close();
}
}
to this:
using (FileStream stream = File.Create(UploadPath + ImageName))
{
byte[] bytes = new byte[1024];
long totalBytes = context.Request.InputStream.Length;
long bytesRead = 0;
int bytesToRead = bytes.Length;
if (totalBytes - bytesRead < bytes.Length)
bytesToRead = (int)(totalBytes - bytesRead);
bytes = new byte[bytesToRead];
while ((bytesToRead = context.Request.InputStream.Read(bytes, bytesRead, bytes.Length)) != 0)
{
stream.Write(bytes, bytesRead, bytes.Length);
bytesRead += bytes.Length;
if (totalBytes - bytesRead < bytes.Length)
bytesToRead = (int)(totalBytes - bytesRead);
bytes = new byte[bytesToRead];
}
stream.Close();
}
1024 would be the buffer size.
I try to Zip some .pdf files in C#. My code works fine but when the size of one of the pdfs is big, it is going to overwrite that pdf on the rest of pdfs. I am not sure what is happening. I tried to increase the size of buffer or zip file but still same issue. Do you have any suggestion?
This is my code:
public void ProcessZipRequest(string strQueueID, string strBatchID, string strFtpPath)
{
int intReportCnt = 0;
string strZipFileName = "Order-" + strBatchID + "-" + strQueueID + "-" + DateTime.Now.ToString("MM-dd-yyyy-HH-mm") + ".zip";
strZipFileName = SafeFileName(strZipFileName);
//MemoryStream ms = new MemoryStream();
FileStream ms = new FileStream(#"c:\surf\nikoo.zip", FileMode.Create);
ZipOutputStream oZipStream = new ZipOutputStream(ms); // create zip stream
oZipStream.SetLevel(9); // maximum compression
intReportCnt += 1;
string strRptFilename=string.Empty;
MemoryStream outputStream = new MemoryStream();
if (strQueueID != null)
{
String[] filenames = Directory.GetFiles(#"C:\uploaded");
// setting Report name to path given for Report name
foreach (String filename in filenames)
{
strRptFilename = filename.Substring(filename.LastIndexOf("\\") + 1);
FileStream fs = File.OpenRead(#"C:\uploaded\" + strRptFilename);
int bufferSize = 2048;
int readCount;
byte[] buffer = new byte[bufferSize];
readCount = fs.Read(buffer, 0, bufferSize);
while (readCount>0)
{
outputStream.Write(buffer, 0, readCount);
readCount = fs.Read(buffer, 0, bufferSize);
}
fs.Close();
outputStream.Position = 0;
ZipFile(ref outputStream, strRptFilename, ref oZipStream);
}
}
outputStream.Close();
oZipStream.Finish();
oZipStream.Flush();
oZipStream.IsStreamOwner = false; // False stops the Close also Closing the underlying stream.
oZipStream.Close(); // Must finish the ZipOutputStream before using outputMemStream.
ms.Close();
}
And this is Zipfile Method:
public void ZipFile(ref MemoryStream msFile, string strFilename, ref ZipOutputStream oZipStream)
{
ZipEntry oZipEntry = new ZipEntry(strFilename);
oZipEntry.DateTime = DateTime.Now;
oZipEntry.Size = msFile.Length;
oZipStream.PutNextEntry(oZipEntry);
StreamUtils.Copy(msFile, oZipStream, new byte[4096]);
oZipStream.CloseEntry();
}
I found the issue. I have to create a new MemoyStream in for loop and close it at the end of the loop.
foreach (String filename in filenames)
{
strRptFilename = filename.Substring(filename.LastIndexOf("\\") + 1);
outputStream = new MemoryStream();
FileStream fs = File.OpenRead(#"C:\uploaded\" + strRptFilename);
int bufferSize = 2048;
int readCount;
byte[] buffer = new byte[bufferSize];
readCount = fs.Read(buffer, 0, bufferSize);
while (readCount>0)
{
outputStream.Write(buffer, 0, readCount);
readCount = fs.Read(buffer, 0, bufferSize);
}
fs.Close();
outputStream.Position = 0;
ZipFile(ref outputStream, strRptFilename, ref oZipStream);
fs.Close();
outputStream.Close();
}