compression and utf8 encoding - c#

can someone tell me why I'm loosing information doing this process ? Some utf8 chars appears not decoded :
"Biography":"\u003clink type=... or Steve Blunt \u0026 Marty Kelley
but others do : "Name":"朱敬
// Creating a 64bit string containing gzip data
string bar;
using (MemoryStream ms = new MemoryStream())
{
using (GZipStream gzip = new GZipStream(ms, CompressionMode.Compress))
using (StreamWriter writer = new StreamWriter(gzip, System.Text.Encoding.UTF8))
{
writer.Write(s);
}
ms.Flush();
bar = Convert.ToBase64String(ms.ToArray());
}
// Reading it
string foo;
byte[] itemData = Convert.FromBase64String(bar);
using (MemoryStream src = new MemoryStream(itemData))
using (GZipStream gzs = new GZipStream(src, CompressionMode.Decompress))
using (MemoryStream dest = new MemoryStream(itemData.Length*2))
{
gzs.CopyTo(dest);
foo = Encoding.UTF8.GetString(dest.ToArray());
}
Console.WriteLine(foo);

It could be because you are writing the string using StreamWriter but reading it using CopyTo() and Encoding.GetString().
What happens if you try this?
// Reading it
string foo;
byte[] itemData = Convert.FromBase64String(bar);
using (MemoryStream src = new MemoryStream(itemData))
using (GZipStream gzs = new GZipStream(src, CompressionMode.Decompress))
using (StreamReader reader = new StreamReader(gzs, Encoding.UTF8))
{
foo = reader.ReadLine();
}
Although I think you should be using BinaryReader and BinaryWriter:
string s = "Biography:\u003clink type...";
string bar;
using (MemoryStream ms = new MemoryStream())
{
using (GZipStream gzip = new GZipStream(ms, CompressionMode.Compress))
using (var writer = new BinaryWriter(gzip, Encoding.UTF8))
{
writer.Write(s);
}
ms.Flush();
bar = Convert.ToBase64String(ms.ToArray());
}
// Reading it
string foo;
byte[] itemData = Convert.FromBase64String(bar);
using (MemoryStream src = new MemoryStream(itemData))
using (GZipStream gzs = new GZipStream(src, CompressionMode.Decompress))
using (var reader = new BinaryReader(gzs, Encoding.UTF8))
{
foo = reader.ReadString();
}
Console.WriteLine(foo);

The issue was simply that the characters were already encoded in the source string.
Ps : Credit goes to rik for this answer :)
Edit : I also had the StreamReader issue matthew-watson was suggesting.

Related

How to convert from a gzip compressed file to Zstandard

How do I convert from a gzip compressed file to Zstandard? Here is my attempt:
using Zstandard.Net;
public static void ConvertToZstd(string path)
{
byte[] compressed = null;
using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read))
using (var gzipStream = new GZipStream(stream, CompressionMode.Decompress, false))
using (var memoryStream = new MemoryStream())
using (var compressionStream = new ZstandardStream(memoryStream, CompressionMode.Compress))
{
compressionStream.CompressionLevel = 11; // optional!!
//compressionStream.CompressionDictionary = dictionary; // optional!!
compressionStream.Write(gzipStream, 0, gzipStream.Length);
compressionStream.Close();
compressed = memoryStream.ToArray();
}
}
public static void ConvertToZstd(string path, string outputPath)
{
using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read))
using (var gzipStream = new GZipStream(stream, CompressionMode.Decompress, false))
using (var memoryStream = new MemoryStream())
using (var compressionStream = new ZstandardStream(memoryStream, CompressionMode.Compress))
{
compressionStream.CompressionLevel = 11; // optional!!
//compressionStream.CompressionDictionary = dictionary; // optional!!
gzipStream.CopyTo(compressionStream);
compressionStream.Close();
File.WriteAllBytes(outputPath, memoryStream.ToArray());
}
}

Download byte array from stream writer

Hi I am trying to make working this piece of code, after the copy of the word file template into a memory stream, read it and replace some text, I would convert the stream writer to byte array which will be used to download the result. Thanks in advance
public byte[] GetWordFile()
{
try
{
string sourceFile = Path.Combine("C:/[...]/somefile.docx");
using (MemoryStream inStream = new MemoryStream())
{
using (Stream fs = File.Open(sourceFile, FileMode.Open, FileAccess.Read))
{
fs.CopyTo(inStream);
}
using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(inStream, true))
{
string docText = null;
using (StreamReader sr = new StreamReader(wordDoc.MainDocumentPart.GetStream()))
{
docText = sr.ReadToEnd();
}
docText = docText.Replace("numpol", "HAHAHHAHA");
using (MemoryStream outStream = new MemoryStream())
using (StreamWriter sw = new StreamWriter(wordDoc.MainDocumentPart.GetStream(FileMode.Create)))
{
sw.Write(docText);
sw.Flush();
sw.BaseStream.CopyTo(outStream);
outStream.Position = 0;
return outStream.ToArray();
}
}
}
}
catch (Exception ex)
{
///...
}
}

How to Download, Decompress and Deserialize an object/file from Azure Blob Storage?

This code will compress and serialize the object:
public static byte[] ObjectToByteArray(object[] obj)
{
using (MemoryStream msCompressed = new MemoryStream())
using (GZipStream gZipStream = new GZipStream(msCompressed, CompressionMode.Compress))
using (MemoryStream msDecompressed = new MemoryStream())
{
new BinaryFormatter().Serialize(msDecompressed, obj);
byte[] byteArray = msDecompressed.ToArray();
gZipStream.Write(byteArray, 0, byteArray.Length);
gZipStream.Close();
return msCompressed.ToArray();
}
}
And the following will upload it to the Azure Blob Storage:
byte[] byteObject = ObjectToByteArray(uploadObject);
using (Stream stream = new MemoryStream(byteObject))
{
stream.Seek(0, SeekOrigin.Begin);
blockBlob.UploadFromStream(stream, null, options);
}
This works great, but I can't find a way to download, decompress and deserialize this object/file from my storage.
You could use method DownloadToStream to download the file to local.
using (var fileStream = System.IO.File.OpenWrite(#"xxxx\compressedfile.gz"))
{
blockBlob.DownloadToStream(fileStream);
}
And then you could refer to the following code to decompress and deserialize the specified stream.
public static void DecompressAndDeserialize(string path)
{
using (FileStream originalFileStream = new FileStream(path, FileMode.Open, FileAccess.Read))
{
FileInfo fileToDecompress = new FileInfo(path);
string FileName = fileToDecompress.FullName;
string newFileName = FileName.Remove(FileName.Length - fileToDecompress.Extension.Length);
using (FileStream decompressedFileStream = File.Create(newFileName))
{
using (GZipStream decompressionStream = new GZipStream(originalFileStream, CompressionMode.Decompress))
{
decompressionStream.CopyTo(decompressedFileStream);
}
}
FileStream fs = new FileStream(newFileName, FileMode.Open);
BinaryFormatter formatter = new BinaryFormatter();
object[] uploadObject = (object[])formatter.Deserialize(fs);
}
}

How to convert a compressed stream to an uncompressed stream in c# using GZipStream

Following various samples I've been able to convert a memory stream to a compressed stream and then to a byte array to save in a database but I'm having trouble going the other way. Here's what I've got so far...
...
using (MemoryStream compressedStream = new MemoryStream()) {
...some code that builds the compressedStream for an undetermined
number of byteArrays from a database
using (MemoryStream uncompressedStream = new MemoryStream()) {
// method 1
using (GZipStream unzippedStream = new GZipStream(compressedStream, CompressionMode.Decompress)) {
unzippedStream.CopyTo(uncompressedStream);
}
// method 2
using (GZipStream unzippedStream = new GZipStream(uncompressedStream, CompressionMode.Decompress)) {
compressedStream.CopyTo(unzippedStream);
}
... do something with uncompressedStream
}
}
Method 1 seams to follows the examples I see on here but causes an error "stream does not support writing"
Method 2 seams to make more sense but the uncompressed stream is always empty
P.S. Really what I would like to have is something simple like
MemoryStream compressed = GZipStream(uncompressed, Compress)
MemoryStream upcompressed = GZipStream(compressed, Decompress)
This code example works. The first part is just to get a compressed byte array. The second part demonstrates how the compressed stream can be created in code, write can be done multiple times. But the position must be set to 0.
byte[] compressed;
string output;
using (var outStream = new MemoryStream()) {
using (var tinyStream = new GZipStream(outStream, CompressionMode.Compress))
using (var mStream = new MemoryStream(Encoding.UTF8.GetBytes("This is a test"))) {
mStream.CopyTo(tinyStream);
}
compressed = outStream.ToArray();
}
using (var compressedStream = new MemoryStream()) {
// can do multiple writes here to create the compressed stream
compressedStream.Write(compressed, 0, compressed.Length);
compressedStream.Flush();
compressedStream.Position = 0;
using (var unzippedStream = new GZipStream(compressedStream, CompressionMode.Decompress))
using (var uncompressedStream = new MemoryStream()) {
unzippedStream.CopyTo(uncompressedStream);
output = Encoding.UTF8.GetString(uncompressedStream.ToArray());
}
}
Console.WriteLine(output);

Decompress byte array to string via BinaryReader yields empty string

I am trying to decompress a byte array and get it into a string using a binary reader. When the following code executes, the inStream position changes from 0 to the length of the array, but str is always an empty string.
BinaryReader br = null;
string str = String.Empty;
using (MemoryStream inStream = new MemoryStream(pByteArray))
{
GZipStream zipStream = new GZipStream(inStream, CompressionMode.Decompress);
BinaryReader br = new BinaryReader(zipStream);
str = br.ReadString();
inStream.Close();
br.Close();
}
You haven't shown how is the data being compressed, but here's a full example of compressing and decompressing a buffer:
using System;
using System.IO;
using System.IO.Compression;
using System.Text;
class Program
{
static void Main()
{
var test = "foo bar baz";
var compressed = Compress(Encoding.UTF8.GetBytes(test));
var decompressed = Decompress(compressed);
Console.WriteLine(Encoding.UTF8.GetString(decompressed));
}
static byte[] Compress(byte[] data)
{
using (var compressedStream = new MemoryStream())
using (var zipStream = new GZipStream(compressedStream, CompressionMode.Compress))
{
zipStream.Write(data, 0, data.Length);
zipStream.Close();
return compressedStream.ToArray();
}
}
static byte[] Decompress(byte[] data)
{
using (var compressedStream = new MemoryStream(data))
using (var zipStream = new GZipStream(compressedStream, CompressionMode.Decompress))
using (var resultStream = new MemoryStream())
{
zipStream.CopyTo(resultStream);
return resultStream.ToArray();
}
}
}

Categories

Resources