Split large datafile into multiple file with complete rows - c#

I want to split large data file (5 GB) into multiple files (5 files of 1 GB).
I am using this code:-
string destFileLocation = #"C:\Users\";
int index = 0;
long maxFileSize = 1073741824;
byte[] buffer = new byte[65536];
//int a = buffer.Length;
using (Stream source = File.OpenRead(sourceFileName))
{
try
{
while (source.Position < source.Length)
{
index++;
// Create a new sub File, and read into t
string newFileName = Path.Combine(destFileLocation, Path.GetFileNameWithoutExtension(sourceFileName));
//destinationFile = new StreamWriter(
// string.Format(destinationFileName, fileCounter + 1));
newFileName += "_" + index.ToString() + Path.GetExtension(sourceFileName);
using (Stream destination = File.OpenWrite(newFileName))
{
try
{
while (destination.Position < maxFileSize)
{
int bytes = source.Read(buffer, 0, (int)Math.Min(maxFileSize, buffer.Length));
destination.Write(buffer, 0, bytes);
if (bytes < Math.Min(maxFileSize, buffer.Length))
{
break;
}
}
}
finally
{
destination.Dispose();
destination.Close();
}
}
}
}
finally
{
source.Dispose();
source.Close();
}
}
Now files are splitting between the rows but we need full rows.
Please provide some suggestion.

Related

Splitting of text file not working properly in c#

I have requirement of writing to text file.
If the file size exceeds 700MB, create new file & write to it.
I am currently writing data with “|” delimited from database to file & after that check the file size & splitting into multiple files, but the file splits in middle of the line.
It should write till end of line or start that particular line in new file .
I need to write the column names in the first line in the newly splited file.
I am new to c#, could you please suggest me the solution with the sample code.
Please find below code to splitting the file
private static void ReadWriteToFile(string fileNames)
{
string sourceFileName = fileNames;
string destFileLocation = Path.GetDirectoryName(fileNames);
int index = 0;
long maxFileSize = 700 * 1024 * 1024;
byte[] buffer = new byte[65536];
using (Stream source = File.OpenRead(sourceFileName))
{
while (source.Position < source.Length)
{
index++;
string newFileName = Path.Combine(destFileLocation, Path.GetFileNameWithoutExtension(sourceFileName));
newFileName += index.ToString() + Path.GetExtension(sourceFileName);
using (Stream destination = File.OpenWrite(newFileName))
{
while (destination.Position < maxFileSize)
{
int bytes = source.Read(buffer, 0, (int)Math.Min(maxFileSize, buffer.Length));
destination.Write(buffer, 0, bytes);
if (bytes < Math.Min(maxFileSize, buffer.Length))
{
break;
}
}
}
}
}
}
Thanks in advance.
Could you please let me know if there is any alternative best way to do this
Try this, a rewrite of a line file splitter i wrote in my beginning c# times.
(You only have to add the column header as a string in the beginning of a new file.)
private static void SplitAfterMBytes(int splitAfterMBytes, string filename)
{
// Variable for max. file size.
var maxFileSize = splitAfterMBytes * 1048576;
int fileCount = 0;
long byteCount = 0;
StreamWriter writer = null;
try
{
var inputFile = new FileInfo(filename);
var index = filename.LastIndexOf('.');
//get only the name of the file.
var fileStart = filename.Substring(0, index);
// get the file extension
var fileExtension = inputFile.Extension;
// generate a new file name.
var outputFile = fileStart + '_' + fileCount++ + fileExtension;
// file format is like: QS_201101_321.txt.
writer = new StreamWriter(outputFile);
using (var reader = new StreamReader(filename))
{
for (string str; (str = reader.ReadLine()) != null;)
{
byteCount = byteCount + System.Text.Encoding.Unicode.GetByteCount(str);
if (byteCount >= maxFileSize)
{
// max number of bytes reached
// write into the old file, without Newline,
// so that no extra line is written.
writer.Write(str);
// 1. close the actual file.
writer.Close();
// 2. open a new file with number incresed by 1.
outputFile = fileStart + '_' + fileCount++ + fileExtension;
writer = new StreamWriter(outputFile);
byteCount = 0; //reset the counter.
}
else
{
// Write into the old file.
// Use a Linefeed, because Write works without LF.
// like Java ;)
writer.Write(str);
writer.Write(writer.NewLine);
}
}
}
}
catch (Exception ex)
{
// do something useful, like: Console.WriteLine(ex.Message);
}
finally
{
writer.Dispose();
}
}

Character Limit in filepath C#

I have the following piece of code which uploads a file and checks its validity.
First issue:
if (RadUpload1.UploadedFiles.Count == 0)
{
Session[AppConstants.ERROR_MESSAGE] = ErrorsList.GetErrorMessage(
ErrorsList.ERR_P_DATE_FILE_VALID);
}
else
{
if (RadUpload1.UploadedFiles.Count > 0)
{
foreach (UploadedFile validFile in RadUpload1.UploadedFiles)
{
FileInfo fi = new FileInfo(validFile.FileName);
Stream fs = validFile.InputStream;
IDbContextualRecord pFile = statusContext.CreateAndAddRecordForInsert(PartStoredFile.t_name);
pFile[PartStoredFile.c_partId] = _part[Part.c_id];
string targetFolder = AppSession.Current.ConfigParameters[AppConstants.UPLOAD_FILE_PATH] +
"\\partRec\\" + _part[Part.c_id] + "\\" + pFile[PartStoredFile.c_id];
long bytesOnTheStream = 0L;
try
{
DirectoryInfo dir = new DirectoryInfo(targetFolder);
if (dir.Exists == false)
dir.Create();
string fullFileName = Path.Combine(targetFolder, fi.Name);
Stream targetStream = File.OpenWrite(fullFileName);
byte[] buffer = new Byte[AppConstants.BUFF_SIZE];
int bytesRead;
// while the read method returns bytes
// keep writing them to the output stream
while ((bytesRead = fs.Read(buffer, 0, AppConstants.BUFF_SIZE)) > 0)
{
targetStream.Write(buffer, 0, bytesRead);
bytesOnTheStream += bytesRead;
}
fs.Close();
targetStream.Close();
}
catch (Exception ex)
{
throw ex;
}
What I want to do is to check if the number of characters in the filepath name exceeds 260 to display me a message of error.
This is the second issue after the modification was made:
if (RadUpload1.UploadedFiles.Count <= 0)
{
Session[AppConstants.ERROR_MESSAGE] = ErrorsList.GetErrorMessage(
ErrorsList.ERR_P_DATE_FILE_VALID);
}
else
{
if (RadUpload1.UploadedFiles.Count > 0 )
{
foreach (UploadedFile validFile in RadUpload1.UploadedFiles)
{
pomDoc = (IDbContextualRecord)Session[AppConstants.POM_DOCUMENT_NEW];
FileInfo fi = new FileInfo(validFile.FileName);
Stream fs = validFile.InputStream;
IDbContextualRecord pomFile = pomContext.CreateAndAddRecordForInsert(PomFile.t_name);
pomFile[PomFile.c_pomDocumentId] = pomDoc[PomDocument.c_id];
string targetFolder = AppSession.Current.ConfigParameters[AppConstants.UPLOAD_FILE_PATH] + "\\POM\\" + pomDoc[PomDocument.c_id] + "\\" + pomFile[PomFile.c_id];
long bytesOnTheStream = 0L;
try
{
DirectoryInfo dir = new DirectoryInfo(targetFolder);
if (dir.Exists == false)
dir.Create();
string fullFileName = Path.Combine(targetFolder, fi.Name);
if (fullFileName.Length > 260)
{
throw new Exception(string.Format("The filename is too long!",fullFileName));
}
Stream targetStream = File.OpenWrite(fullFileName);
byte[] buffer = new Byte[AppConstants.BUFF_SIZE];
int bytesRead;
// while the read method returns bytes
// keep writing them to the output stream
while ((bytesRead = fs.Read(buffer, 0, AppConstants.BUFF_SIZE)) > 0)
{
targetStream.Write(buffer, 0, bytesRead);
bytesOnTheStream += bytesRead;
}
fs.Close();
targetStream.Close();
}
catch (Exception ex)
{
throw ;
}
You just have to compare fullFileName.Lenght to 260 and raise an exception if needed:
if (RadUpload1.UploadedFiles.Count <= 0) // Changed the condition to remove the check within the else block
{
Session[AppConstants.ERROR_MESSAGE] = ErrorsList.GetErrorMessage(
ErrorsList.ERR_P_DATE_FILE_VALID);
}
else
{
foreach (UploadedFile validFile in RadUpload1.UploadedFiles)
{
FileInfo fi = new FileInfo(validFile.FileName);
Stream fs = validFile.InputStream;
IDbContextualRecord pFile = statusContext.CreateAndAddRecordForInsert(PartStoredFile.t_name);
pFile[PartStoredFile.c_partId] = _part[Part.c_id];
string targetFolder = AppSession.Current.ConfigParameters[AppConstants.UPLOAD_FILE_PATH] +
"\\partRec\\" + _part[Part.c_id] + "\\" + pFile[PartStoredFile.c_id];
long bytesOnTheStream = 0L;
try
{
DirectoryInfo dir = new DirectoryInfo(targetFolder);
if (dir.Exists == false)
dir.Create();
string fullFileName = Path.Combine(targetFolder, fi.Name);
if(fullFileName.Length > 260)
{
throw new Exception(string.Format("The filename {0} is too long.", fullFileName));
// Or do whatever you want
}
Stream targetStream = File.OpenWrite(fullFileName);
byte[] buffer = new Byte[AppConstants.BUFF_SIZE];
int bytesRead;
// while the read method returns bytes
// keep writing them to the output stream
while ((bytesRead = fs.Read(buffer, 0, AppConstants.BUFF_SIZE)) > 0)
{
targetStream.Write(buffer, 0, bytesRead);
bytesOnTheStream += bytesRead;
}
fs.Close();
targetStream.Close();
}
catch (Exception ex)
{
throw;
}
Also, you don't want to throw ex; but rather throw; or this will reset the stacktrace, see Is there a difference between "throw" and "throw ex"?

How to split file into parts and download

I'm working on a split downloader for c#. It is downloading fine (so the logic is working) but the problem is that whatever file it downloads it corrupts. I have no idea on how to fix it. Here's the code:
private void mergeClean()
{
const int chunkSize = 1 * 1024; // 2KB
using (var output = File.Create("output.jpg"))
{
foreach (var file in Files)
{
using (var input = File.OpenRead(file))
{
var buffer = new byte[chunkSize];
int bytesRead;
while ((bytesRead = input.Read(buffer, 0, buffer.Length)) > 0)
{
output.Write(buffer, 0, bytesRead);
}
}
}
}
foreach (var file in Files)
{
File.Delete(file);
}
}
private void SaveFileStream(String path, Stream stream)
{
var fileStream = new FileStream(path, FileMode.Create, FileAccess.Write);
stream.CopyTo(fileStream);
fileStream.Dispose();
}
public void SplitDownload(string URL)
{
System.Net.WebRequest req = System.Net.HttpWebRequest.Create(URL);
req.Method = "HEAD";
System.Net.WebResponse resp = req.GetResponse();
var responseLength = double.Parse(resp.Headers.Get("Content-Length"));
var partSize = Math.Ceiling(responseLength / 10);
var previous = 0;
for (int i = (int)partSize; i <= responseLength; i = i + (int)partSize)
{
Thread t = new Thread(() => Download(URL, previous, i));
t.Start();
t.Join();
previous = i;
}
mergeClean();
}
private void Download(string URL, int Start, int End)
{
Console.WriteLine(String.Format("{0},{1}", Start, End));
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(URL);
myHttpWebRequest.AddRange(Start, End);
HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
Stream streamResponse = myHttpWebResponse.GetResponseStream();
String name = GenerateTempName();
SaveFileStream(name, streamResponse);
Files.Add(name);
}
Here is an example of what it does:
UPDATED CODE:
static string GenerateTempName(int start)
{
String name = String.Format("{0:D6}.tmp", start);
return name;
}
static public List<string> Files = new List<string>();
static private void mergeClean()
{
Files.Sort();
const int chunkSize = 1 * 1024; // 2KB
using (var output = File.Create("output.jpg"))
{
foreach (var file in Files)
{
using (var input = File.OpenRead(file))
{
var buffer = new byte[chunkSize];
int bytesRead;
while ((bytesRead = input.Read(buffer, 0, buffer.Length)) > 0)
{
output.Write(buffer, 0, bytesRead);
}
}
}
}
foreach (var file in Files)
{
File.Delete(file);
}
}
You need to recombine file from pieces in correct order - current code create random file names and even if items are added to list of files they are added in random order due to unpredictable time when segment download finishes.
Possible fix: use block start offset as part of the file name String name = String.Format("file{0:D6}.tmp", Start) and sort files by name before combining them back.
Note that {0:D6} formatting is used to pad index with 0 to allow sorting by name to be easier and avoid need for natural sort code.

Silverlight becomes unresponsive when downloading a file

I'm trying to use the following snippet in order download files via the SaveFileDialog in Silverlight:
public void SaveMediaLocal(string fileName)
{
FileInfo fInfo = new FileInfo(fileName);
if (fInfo.Exists)
{
if (fInfo.Length > 0)
{
string extension = fInfo.Extension;
SaveFileDialog dialog = new SaveFileDialog()
{
DefaultExt = extension,
Filter = String.Format("{1} files (*.{0})|*.{0}|All files (*.*)|*.*", extension, fInfo.Extension),
FilterIndex = 1,
DefaultFileName = fInfo.Name
};
if (dialog.ShowDialog() == true)
{
try
{
bool cancelFlag = false;
byte[] buffer = new byte[1024 * 1024]; // 1GB buffer
using (FileStream dest = (FileStream)dialog.OpenFile())
{
using (FileStream source = new FileStream(fInfo.FullName, FileMode.Open, FileAccess.Read))
{
long fileLength = source.Length;
long totalBytes = 0;
int currentBlockSize = 0;
while ((currentBlockSize = source.Read(buffer, 0, buffer.Length)) > 0)
{
totalBytes += currentBlockSize;
double percentage = (double)totalBytes * 100.0 / fileLength;
dest.Write(buffer, 0, currentBlockSize);
}
}
}
}
catch
{
}
}
}
else
{
//no results
}
}
}
When I use this snippet; Silverlight freezes until the download completes.
When I use this snippet instead, the UI is responsive, but doesn't work on bigger files.
using (Stream stream = dialog.OpenFile())
{
Byte[] bytes = File.ReadAllBytes(fileName);
stream.Write(bytes, 0, bytes.Length);
}
Is there something that I'm missing here?
Don't do the operation on the GUI thread. That is why it gets unresponsive. Either create a new thread or async process and do the operation in the background.

tcpclient reads fewer bytes than expected

I have a problem with tcpclient, I need to send one or more files, so I have an application with server and client, the protocol is this :
1) I send some strings with information about number of files, files name and their sizes, all this with streamwriter.writeline (received from server with the function
streamreader.readline)
2) After these strings I send the files, after each file the server answers to the client with a streamwriter.writeline of "ACK". The file is sent with the
networkstream.write method, and received with networkstream.read.
The problem is that the server reads till the received bytes are equal to the file size, but... despite the client "seems" to send every byte of the file, the server
receives fewer bytes in total! So the application is blocked in this step, the server is waiting for the next bytes and the client is waiting for the string of "ACK"
with the streamreader.readline before to send the next file or just to finish the operation.
I also wanted to check what the server receives, so I print the number of bytes received during the reading cycle , discovering that sometimes the server reads fewer bytes than the buffer size of the stream (fixed to 1024). This should be normal because TCP reads as soon as it can, it should not be the real problem, right? I can't
believe that tcp loses bytes, but I don't know how to resolve.
Here you can find some part of codes :
----SERVER SIDE----------
..........Doing Stuffs.............
//secServer is the TCPListener socket.
this.secSHandler = secServer.AcceptTcpClient();
this.secSHandler.ReceiveBufferSize = 1024;
this.secSHandler.SendBufferSize = 1024;
this.is_connected_now = true;
print("is connected!!! ");
//Taking streams...
this.stream = this.secSHandler.GetStream();
this.sr = new StreamReader(stream);
this.sw = new StreamWriter(stream);
string first = sr.ReadLine();
print("I read + " + first + " .");
int numFiles = 0;
string connType = first.Substring(0, 6);
if (connType.CompareTo("CLIENT") == 0)
{
//SINCR CLIENT -> SERVER
string clipType = first.Substring(7, 4);
if (clipType.CompareTo("FILE") == 0)
{
//CASE RECEIVE FILE
int posSeparator = first.IndexOf('*');
string nFiles = first.Substring(12, first.Length - 13);
numFiles = Convert.ToInt16(nFiles);
string[] fileNames = new string[numFiles];
int[] fileSizes = new int[numFiles];
//TAKE FROM THE CLIENT ALL FILE NAMES AND SIZES
for (int i = 0; i < numFiles; i++)
{
fileNames[i] = sr.ReadLine();
print("Name file : I read " + fileNames[i]);
string dim = sr.ReadLine();
fileSizes[i] = Convert.ToInt32(dim);
print("Size file : I read " + fileSizes[i]);
}
//RECEVING FILES
for (int i = 0; i < numFiles; i++)
{
receive_single_file_1(stream, fileSizes[i], fileNames[i]); //CANNOT GO AFTER THIS POINT
sw.WriteLine("File sent - number " + i);
sw.Flush();
}
}
}
.............Doing others stuffs.............
sr.Close();
sw.Close();
THE FUNCTION RECEIVE SINGLE FILE IS HERE BELOW
public bool receive_single_file_1(NetworkStream netstream, int size, string filename)
{
int sizeToRead = 0;
string f_name = "";
//...f_name is the result of another operation, for the sake of the example i write only the final instruction
f_name = filename;
byte[] RecData = new byte[1024];
int RecBytes = 0;
try
{
int totalrecbytes = 0;
FileStream Fs = new FileStream((tempFold + f_name), FileMode.OpenOrCreate, FileAccess.Write);
//COUNTER OF THE WHILE
int nciclo = 0;
while ((RecBytes = netstream.Read(RecData, 0, 1024)) > 0)
{
//I defined print in another context...
totalrecbytes += RecBytes;
if(RecBytes!=1024)
print("Cycle : "+ nciclo +" Content RecBytes : " + RecBytes + " e RecData.Length : " + RecData.Length + " byte reads : " + totalrecbytes + ".");
Fs.Write(RecData, 0, RecBytes);
if (totalrecbytes >= size)
{
print("Read all bytes " + totalrecbytes + " over " + size + " .");
break;
}
//Refresh the buffer
RecData = new byte[1024];
nciclo++;
}
print("End of transfer. Received " + totalrecbytes + "File :" + filename + " Saved on " + tempFold);
Fs.Close();
}
catch (Exception ex)
{
//Ok here i return false, but i do some other stuff before
return false;
}
return true;
}
----END OF SERVER SIDE--------------
--------CLIENT SIDE--------------
.....DOING STUFFS....
//sw is the streamWriter, Sr the streamReader and the stream is the networkstream
System.Collections.Specialized.StringCollection formats = Clipboard.GetFileDropList();
sw.WriteLine("CLIENT:FILE:" + formats.Count + "*");
sw.Flush();
//Sending to the server filename and relative size
foreach (string filename in formats)
{
//Ok the * has sense in my protocol...ignore it.
sw.WriteLine((Directory.Exists(filename)) ? System.IO.Path.GetFileName(filename) + "*" : System.IO.Path.GetFileName(filename));
sw.Flush();
FileStream Fs = new FileStream(filename, FileMode.Open, FileAccess.Read);
sw.WriteLine(Fs.Length);
sw.Flush();
stream.Flush();
Fs.Close();
}
//Sending files
foreach (string filename in formats)
{
//client_sync is the class that wrap the TcpClient socket
client_sync.send_single_file(stream, filename);
resp = sr.ReadLine();
}
....DOING STUFF AND end of this function...
The send file function is defined in this way :
(note : i take this function from code project some weeks ago)
public void send_single_file(NetworkStream netstream, string filename)
{
//connected is a param of my class
if (!connected) return;
byte[] SendingBuffer = null;
try
{
FileStream Fs = new FileStream(filename, FileMode.Open, FileAccess.Read);
int NoOfPackets = Convert.ToInt32(Math.Ceiling(Convert.ToDouble(Fs.Length) / Convert.ToDouble(this.BufferSize)));
//NOTE: BUFFERSIZE IS 1024
int TotalLength = (int)Fs.Length, CurrentPacketLength = 0;
int bytes_send = 0;
for (int i = 0; i < NoOfPackets; i++)
{
if (TotalLength > this.BufferSize)
{
CurrentPacketLength = this.BufferSize;
TotalLength = TotalLength - CurrentPacketLength;
}
else
CurrentPacketLength = TotalLength;
SendingBuffer = new byte[CurrentPacketLength];
Fs.Read(SendingBuffer, 0, CurrentPacketLength);
netstream.Write(SendingBuffer, 0, SendingBuffer.Length);
bytes_send += CurrentPacketLength;
}
Fs.Close();
}
catch (Exception ex)
{
netstream.Close();
//my function
close_connection();
}
netstream.Flush();
}
---------END OF CLIENT SIDE------
So...someone can help me to escape from this hell??? THX :)

Categories

Resources