I have this function that splits a serialized byte array into pieces based on a max length:
public static List<byte[]> ZipStringList(List<string> curSpawn, int maxSize = -1) {
Environment.SetEnvironmentVariable("MONO_REFLECTION_SERIALIZER", "yes");
MemoryStream o = new MemoryStream();
BinaryFormatter bf = new BinaryFormatter();
bf.Serialize(o, curSpawn);
byte[] buffer = o.GetBuffer();
o.Close();
MemoryStream memoryStream = new MemoryStream();
using (GZipStream gZipStream = new GZipStream(memoryStream, CompressionMode.Compress, true))
{
gZipStream.Write(buffer, 0, buffer.Length);
}
memoryStream.Position = 0;
if (maxSize > 0 && memoryStream.Length > maxSize) {
int pieces = Mathf.CeilToInt (memoryStream.Length / (float)maxSize);
List<byte[]> listByte = new List<byte[]> ();
for (int i = 0; i < pieces; ++i) {
int curLength = (int)Mathf.Min (maxSize, memoryStream.Length - (i * maxSize));
byte[] compressedData = new byte[curLength];
memoryStream.Read (compressedData, 0, compressedData.Length);
if (i == pieces - 1) {
byte[] gZipBuffer = new byte[compressedData.Length + 4];
Buffer.BlockCopy (compressedData, 0, gZipBuffer, 4, compressedData.Length);
Buffer.BlockCopy (BitConverter.GetBytes (buffer.Length), 0, gZipBuffer, 0, 4);
listByte.Add (gZipBuffer);
}//if last piece
else {
listByte.Add (compressedData);
}//else normal
}//for each piece
return listByte;
} else {
byte[] compressedData = new byte[memoryStream.Length];
memoryStream.Read (compressedData, 0, compressedData.Length);
byte[] gZipBuffer = new byte[compressedData.Length + 4];
Buffer.BlockCopy (compressedData, 0, gZipBuffer, 4, compressedData.Length);
Buffer.BlockCopy (BitConverter.GetBytes (buffer.Length), 0, gZipBuffer, 0, 4);
List<byte[]> retList = new List<byte[]> ();
retList.Add (gZipBuffer);
return retList;
}
}//ZipStringList()
Then this function that deserializes it:
public static List<string> UnzipStringList(List<byte[]> byteList) {
byte[] gZipBuffer = byteList.SelectMany(b => b).ToArray();
Environment.SetEnvironmentVariable("MONO_REFLECTION_SERIALIZER", "yes");
byte[] buffer;
using (MemoryStream memoryStream = new MemoryStream())
{
int dataLength = BitConverter.ToInt32(gZipBuffer, 0);
memoryStream.Write(gZipBuffer, 4, gZipBuffer.Length - 4);
buffer = new byte[dataLength];
memoryStream.Position = 0;
using (GZipStream gZipStream = new GZipStream(memoryStream, CompressionMode.Decompress))
{
gZipStream.Read(buffer, 0, buffer.Length);
}
}
MemoryStream o = new MemoryStream(buffer);
BinaryFormatter bf = new BinaryFormatter();
return (List<string>)bf.Deserialize(o);
}//UnzipStringList()
When I run it though, I'm getting a "corrupted gzip header" error, so there's some simple thing I'm missing. What is it?
More details: there aren't really, I'm likely positioning the buffer wrong. Fun fact: I searched google for the error message and the only result was this question.
Here's some code you can test with. Make it a large enough file (like at least a kilobyte). My file is full of JSON arrays.
string[] allLines = File.ReadAllLines ("path_to_file.txt");
List<byte[]> byteArray = Utility.ZipStringList(new List<string>(allLines), 60000);
List<string> stringList = Utility.UnzipStringList (byteArray);
Utility.LogRealError ("I unzipped it?", stringList.Count, stringList [4]);
Related
When I am trying to deflate one string to XML getting the error. can anyone help me out, if the string is wrong or any other thing I need to consider?
string output = DecompressString("H4sIAAAAAAAAAO1ay3LbIBTd9ys83hOcZrrxKP6CZpUuusUStulIoAFkxfn6XpCQ0NvWTDt+ZBUJ7vtwEQcneHv7ReN3Ko8spGrxkcRcrT8Ue10etE7XGOd5/pS/PAm5x99Xq2f8++3ne3igCUGMK014SJeFVqWRSfZEtWJWh+85TokkicIghRVLFPiCl5Cky823xSIIRZLGVFMkJNszTjTje8QiyjXTJ5RKquARRgU34qAwKbcgoWZH+rrUMqNL7NRSKokWctrPgmT6AFKfNCptFCbACEhpyUIjhcSRSgkGNu4BcaFR4TvAvZLOTEjimEoUMZXG5IQ4SaiLYGMcBnhMosgHn5+QrTO+rNATuHjpnQuLr1NBtCOxmoNR05iHV2GwqnQioOxgNSEcVANs33vQ3EC9wQE7EsiY8Z0ADTPeAPLyyrfrNIVCW7yShock4ywsrEZQO6nqiAZmm41Qp53FEKF2bZtJvmZU79ZFn65heA2bgLEJxlIRs/BUKZfqCwalNmsUQaVyIiOTRcZDwSNmYiAx2p7QEfISnqqNtJRQ/rABikaMzN9JrCuA11hpWTbxoojaWgC60WwnuJEJ7k8lIBY95QnDGoIQZlWQZBH7TxW0rm6+gttMne596YFhk+YNoHDPy/f6UeACvidp/Aj9AKkSrnIqbwSOO2+MW4ADzjYZOLSHdgk1OJAtVOMRWsXL9xbxufveuR58ijfgCo75jNOQihcN8pFhotZUkcB5eMGdOdCxoyjGN2FMSdt8QwKJXe2vHe+Q1ZoPuvn8QMu4LT+T9A8NAWREdDkKz1rYBbqB2oJQRs0AAdKuPUUjYCjkHLsuKi00kAGeJVvoBj89ZdoFmaXSrMjmeRXgy7UqJEAUqhO5qmmWQDQvYLN/ph/BZidX375CZdVGZlTaD4zD2XK3g5RqQ93RNk0fWbUeIR+TGhDKCTM49rLwcm7qpmNAafAGymyTbmVBlUIKxpXZJNnOGTkwhexuCn9dhNZ7ca00Q3u8oJ7UYDl9mQGRg4ij8eoYiW5p8KhjZ3XI65ZIWafIgIAnpkC9Qv/4PqX0YrrV1r+KBd7UvZ9Mpr5RrSCgJCKfn17ZDtZKM4qOu+ETyyRe93xSuRG8OieY8Q53+0+m9+JKtoE6lq9t4GqW1UV4fW0DV4UX45pKTh7jZwp7pvHy/UL9EX5aeWjUO5/88a95m3JMUYBe+uEbmDo8XMZfvChrCpNQpcieOlblcfJ2MMOS04GMe/EJFQfuTXlIu/TNzYww2oR8eIIIVoRmIUsJ1/YiY2S6S0v9QBolbUxU43Y7Hom/PT8vix8XJdEXk5dK73SdaKY0rN5Pc6UUl5dMWvAuMANyPTd0neU54sILpLxoODugCflzApt0GeDGfzttvv0F8ylWAP4kAAA=");
public static string DecompressString(string compressedString)
{
byte[] decompressedBytes;
var compressedStream = new MemoryStream(Convert.FromBase64String(compressedString));
using (var decompressorStream = new DeflateStream(compressedStream, CompressionMode.Decompress))
{
using (var decompressedStream = new MemoryStream())
{
decompressorStream.CopyTo(decompressedStream);
decompressedBytes = decompressedStream.ToArray();
}
}
return Encoding.UTF8.GetString(decompressedBytes);
}
public static string DecodeGzip(string str)
{
byte[] gzBuffer = Convert.FromBase64String(str);
using (MemoryStream ms = new MemoryStream())
{
int msgLength = BitConverter.ToInt32(gzBuffer, 0);
ms.Write(gzBuffer, 0, gzBuffer.Length);
byte[] buffer = new byte[msgLength];
ms.Position = 0;
int length;
using (GZipStream zip = new GZipStream(ms, CompressionMode.Decompress))
{
length = zip.Read(buffer, 0, buffer.Length);
}
var data = new byte[length];
Array.Copy(buffer, data, length);
return Encoding.UTF8.GetString(data);
}
}
I was using GZipStream for some time without any problems. I didn't change any code at my function but now the compress and decompress doesn't work. I have extracted the following simple example (maybe I do something wrong generally...).
public static class ZipLib
{
#region Public Methods
public static string Unzip(byte[] compressedText)
{
try {
byte[] gzBuffer = compressedText;
int msgLength = BitConverter.ToInt32(gzBuffer, 0);
byte[] buffer = new byte[msgLength];
using (MemoryStream ms = new MemoryStream()) {
ms.Write(gzBuffer, 4, gzBuffer.Length - 4);
using (GZipStream zip = new GZipStream(ms, CompressionMode.Decompress, true)) {
zip.Read(buffer, 0, buffer.Length);
}
}
return Encoding.UTF8.GetString(buffer);
}
catch (Exception ex) {
Debug.WriteLine(ex.Message);
}
return "";
}
public static byte[] Zip(string text)
{
byte[] buffer = Encoding.UTF8.GetBytes(text);
MemoryStream ms = new MemoryStream();
using (GZipStream zip = new GZipStream(ms, CompressionMode.Compress, true)) {
zip.Write(buffer, 0, buffer.Length);
}
ms.Position = 0;
byte[] compressed = new byte[ms.Length];
ms.Read(compressed, 0, compressed.Length);
byte[] gzBuffer = new byte[compressed.Length + 4];
Buffer.BlockCopy(compressed, 0, gzBuffer, 4, compressed.Length);
Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gzBuffer, 0, 4);
return gzBuffer;
}
#endregion
}
internal class Program
{
#region Private Methods
private static void Main(string[] args)
{
string uncompressedIn = "Hallo Welt";
var compressed = ZipLib.Zip(uncompressedIn);
var uncompressed = ZipLib.Unzip(compressed);
Console.WriteLine("Uncompressed Input: " + uncompressedIn);
Console.WriteLine("Compressed value: " + string.Join(" | ", compressed));
Console.WriteLine("Uncompressed from compressed: " + string.Join(" | ", uncompressed.ToCharArray()));
Console.ReadKey();
}
#endregion
}
The value of uncompressed is "\0\0\0\0"...
In Unzip, you Write to a MemoryStream, then give it to the GZipStream to read from, but: it is still at the end of the stream, you didn't rewind it. Try adding:
ms.Position = 0;
after the Write. Note, however, that it might be more efficient to do:
using(MemoryStream ms = new MemoryStream(gzBuffer, 4, gzBuffer.Length - 4))
{
//...
}
this avoids duplicating any memory
I have several .gz files, and I want to decompress them one by one.
I have writen a simple code using GzipStream in C#, but got failed. I wonder a correct and useful method to achieve what I want. Thanks a lot.
private string Extrgz(string infile)
{
string dir = Path.GetDirectoryName(infile);
string decompressionFileName = dir + Path.GetFileNameWithoutExtension(infile) + "_decompression.bin";
using (GZipStream instream = new GZipStream(File.OpenRead(infile), CompressionMode.Compress))// ArgumentException...
{
using (FileStream outputStream = new FileStream(decompressionFileName, FileMode.Append, FileAccess.Write))
{
int bufferSize = 8192, bytesRead = 0;
byte[] buffer = new byte[bufferSize];
while ((bytesRead = instream.Read(buffer, 0, bufferSize)) > 0)
{
outputStream.Write(buffer, 0, bytesRead);
}
}
}
return decompressionFileName;
}
You need to decompress but you set CompressionMode.Compress, replace it with CompressionMode.Decompress.
Example here.
Here:
public static void DeCompressFile(string CompressedFile, string DeCompressedFile)
{
byte[] buffer = new byte[1024 * 1024];
using (System.IO.FileStream fstrmCompressedFile = System.IO.File.OpenRead(CompressedFile)) // fi.OpenRead())
{
using (System.IO.FileStream fstrmDecompressedFile = System.IO.File.Create(DeCompressedFile))
{
using (System.IO.Compression.GZipStream strmUncompress = new System.IO.Compression.GZipStream(fstrmCompressedFile,
System.IO.Compression.CompressionMode.Decompress))
{
int numRead = strmUncompress.Read(buffer, 0, buffer.Length);
while (numRead != 0)
{
fstrmDecompressedFile.Write(buffer, 0, numRead);
fstrmDecompressedFile.Flush();
numRead = strmUncompress.Read(buffer, 0, buffer.Length);
} // Whend
//int numRead = 0;
//while ((numRead = strmUncompress.Read(buffer, 0, buffer.Length)) != 0)
//{
// fstrmDecompressedFile.Write(buffer, 0, numRead);
// fstrmDecompressedFile.Flush();
//} // Whend
strmUncompress.Close();
} // End Using System.IO.Compression.GZipStream strmUncompress
fstrmDecompressedFile.Flush();
fstrmDecompressedFile.Close();
} // End Using System.IO.FileStream fstrmCompressedFile
fstrmCompressedFile.Close();
} // End Using System.IO.FileStream fstrmCompressedFile
} // End Sub DeCompressFile
// http://www.dotnetperls.com/decompress
public static byte[] Decompress(byte[] gzip)
{
byte[] baRetVal = null;
using (System.IO.MemoryStream ByteStream = new System.IO.MemoryStream(gzip))
{
// Create a GZIP stream with decompression mode.
// ... Then create a buffer and write into while reading from the GZIP stream.
using (System.IO.Compression.GZipStream stream = new System.IO.Compression.GZipStream(ByteStream
, System.IO.Compression.CompressionMode.Decompress))
{
const int size = 4096;
byte[] buffer = new byte[size];
using (System.IO.MemoryStream memory = new System.IO.MemoryStream())
{
int count = 0;
count = stream.Read(buffer, 0, size);
while (count > 0)
{
memory.Write(buffer, 0, count);
memory.Flush();
count = stream.Read(buffer, 0, size);
}
baRetVal = memory.ToArray();
memory.Close();
}
stream.Close();
} // End Using System.IO.Compression.GZipStream stream
ByteStream.Close();
} // End Using System.IO.MemoryStream ByteStream
return baRetVal;
} // End Sub Decompress
I am writing a C# assembly to take advantage of the GZip functionality in newer versions of the .NET framework, and I am copying code I have used successfully elsewhere.
The way this works is that an application we are using feeds a stream to the assembly, the assembly reads it, compresses it, then returns the compressed result as a string. That string is then put back into the assembly through a stream to uncompress it (normally it will be stored, but I am simply running a basic test).
However, when I feed the string back to the assembly, it errors out while reading it from a byte array that is read from the application's stream. Here is the code:
private void ReadStream(IStream stream, out byte[] data)
{
using (MemoryStream writer = new MemoryStream())
{
IntPtr rwBytes = Marshal.AllocHGlobal(4);
int _rwBytes = 0;
data = new byte[0xafc8];
do
{
stream.Read(data, 0xafc8, rwBytes);
_rwBytes = Marshal.ReadInt32(rwBytes);
writer.Write(data, 0, _rwBytes);
}
while (_rwBytes > 0);
writer.Close();
Marshal.FreeHGlobal(rwBytes);
}
}
public string CompressString([In, MarshalAs(UnmanagedType.AsAny)] object appStream)
{
byte[] buffer = null;
ReadStream(appStream as IStream, out buffer);
MemoryStream ms = new MemoryStream();
using (GZipStream zip = new GZipStream(ms, CompressionMode.Compress, true))
{
zip.Write(buffer, 0, buffer.Length);
}
ms.Position = 0;
MemoryStream outStream = new MemoryStream();
byte[] compressed = new byte[ms.Length];
ms.Read(compressed, 0, compressed.Length);
byte[] gzBuffer = new byte[compressed.Length + 4];
System.Buffer.BlockCopy(compressed, 0, gzBuffer, 4, compressed.Length);
System.Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gzBuffer, 0, 4);
string str = Convert.ToBase64String(gzBuffer);
gzBuffer = Convert.FromBase64String(str);
return str;
}
public string DecompressString([In, MarshalAs(UnmanagedType.AsAny)] object appStream)
{
byte[] buffer = null;
ReadStream(appStream as IStream, out buffer);
string compressedText = Encoding.UTF8.GetString(buffer);
byte[] gzBuffer = Convert.FromBase64String(compressedText);
using (MemoryStream ms = new MemoryStream())
{
int msgLength = BitConverter.ToInt32(gzBuffer, 0);
ms.Write(gzBuffer, 4, gzBuffer.Length - 4);
buffer = new byte[msgLength];
ms.Position = 0;
using (GZipStream zip = new GZipStream(ms, CompressionMode.Decompress))
{
zip.Read(buffer, 0, buffer.Length);
}
}
return Encoding.UTF8.GetString(buffer);
}
The string is returned from the CompressString function as:
yK8AAB+LCAAAAAAABADtvQdgHEmWJSYvbcp7f0r1StfgdKEIgGATJNiQQBDswYjN5pLsHWlHIymrKoHKZVZlXWYWQMztnbz33nvvvffee++997o7nU4n99//P1xmZAFs9s5K2smeIYCqyB8/fnwfPyK+ndd5WjRplk6rxarOm6aolmmbN+04TZ/n7cdN2uTU4jwt2vSqqt8241/jR8+Pnh89P3p+9Pzo+dHzo+dHz4+eHz0/en70/Oj50fOj50fPj54fPT96fvT86PnR86PnR8+Pnh89P3p+9Pzo+dHz/67nR9j86PnR86PnR8+Pnh89P3p+9Pzo+dHzo+dHz4+eHz0/en70/Oj50fOj50fPj54fPT96fvT86PnR86PnR8+Pnh89/99/fjSCHz0/en70/Oj50fOj50fPj54fPT96fvT86PnR86PnR8+Pnh89P3p+9Pzo+dHzo+dHz4+eHz0/en70/Oj50fOj5//7z49G8KPnR8+Pnh89P3p+9Pzo+dHzo+dHz4+eHz0/en70/Oj50fOj50fPj54fPT96fvT86PnR86PnR8+Pnh89P3p+9Px///nRCH70/Oj50fOj50fPj54fPT96fvT86PnR86PnR8+Pnh89P3p+9Pzo+dHzo+dHz4+eHz0/en70/Oj50fOj50fPj57/7z8/GsGPnh89P3p+9Pzo+dHzo+dHz4+eHz0/en70/Oj50fP/nef/AYk/UbjIrwAA
When it is read back from the stream in the Encoding.UTF8.GetString call, it is:
yK8AAB+LCAAAAAAABADtvQdgHEmWJSYvbcp7f0r1StfgdKEIgGATJNiQQBDswYjN5pLsHWlHIymrKoHKZVZlXWYWQMztnbz33nvvvffee++997o7nU4n99//P1xmZAFs9s5K2smeIYCqyB8/fnwfPyK+ndd5WjRplk6rxarOm6aolmmbN+04TZ/n7cdN2uTU4jwt2vSqqt8241/jR8+Pnh89P3p+9Pzo+dHzo+dHz4+eHz0/en70/Oj50fOj50fPj54fPT96fvT86PnR86PnR8+Pnh89P3p+9Pzo+dHz/67nR9j86PnR86PnR8+Pnh89P3p+9Pzo+dHzo+dHz4+eHz0/en70/Oj50fOj50fPj54fPT96fvT86PnR86PnR8+Pnh89/99/fjSCHz0/en70/Oj50fOj50fPj54fPT96fvT86PnR86PnR8+Pnh89P3p+9Pzo+dHzo+dHz4+eHz0/en70/Oj50fOj5//7z49G8KPnR8+Pnh89P3p+9Pzo+dHzo+dHz4+eHz0/en70/Oj50fOj50fPj54fPT96fvT86PnR86PnR8+Pnh89P3p+9Px///nRCH70/Oj50fOj50fPj54fPT96fvT86PnR86PnR8+Pnh89P3p+9Pzo+dHzo+dHz4+eHz0/en70/Oj50fOj50fPj57/7z8/GsGPnh89P3p+9Pzo+dHzo+dHz4+eHz0/en70/Oj50fP/nef/AYk/UbjIrwAA
These strings are identical. I can even convert it back and forth in the CompressString function with no problem. Any thoughts out there? This is very strange.
Don't use UTF8, you should use the ascii text encoding instead. When you get the bytes from your string, you'll end up with some extra bytes due to the way UTF strings are encoded.
I am trying to compress data using the zlib .net library. Regardless of the content of the uncompressed string I only seem to get two bytes of data in the raw[].
{
string uncompressed = "1234567890";
byte[] data = UTF8Encoding.Default.GetBytes(uncompressed);
MemoryStream input = new MemoryStream(data);
MemoryStream output = new MemoryStream();
Stream outZStream = new ZOutputStream(output,zlibConst.Z_DEFAULT_COMPRESSION);
CopyStream(input, outZStream);
output.Seek(0, SeekOrigin.Begin);
byte[] raw = output.ToArray();
string compressed = Convert.ToBase64String(raw);
}
public void CopyStream(System.IO.Stream input, System.IO.Stream output)
{
byte[] buffer = new byte[2000];
int len;
while ((len = input.Read(buffer, 0, 2000)) > 0)
{
output.Write(buffer, 0, len);
}
output.Flush();
}
The problem here is that the ZOutputStream actually writes some of the information into the stream in the finish() method (which is called by Close). The Close method also closes the base stream, so that is not much use in this situation.
Changing the code to the following should work:
{
string uncompressed = "1234567890";
byte[] data = UTF8Encoding.Default.GetBytes(uncompressed);
MemoryStream input = new MemoryStream(data);
MemoryStream output = new MemoryStream();
ZOutputStream outZStream = new ZOutputStream(output,zlibConst.Z_DEFAULT_COMPRESSION);
CopyStream(input, outZStream);
outZStream.finish();
output.Seek(0, SeekOrigin.Begin);
byte[] raw = output.ToArray();
string compressed = Convert.ToBase64String(raw);
}
public void CopyStream(System.IO.Stream input, System.IO.Stream output)
{
byte[] buffer = new byte[2000];
int len;
while ((len = input.Read(buffer, 0, 2000)) > 0)
{
output.Write(buffer, 0, len);
}
output.Flush();
}