Closed. This question is opinion-based. It is not currently accepting answers.
Want to improve this question? Update the question so it can be answered with facts and citations by editing this post.
Closed 3 years ago.
Improve this question
I wonder what is the best way to store binary data indexed by a string key into a single file.
This would be the circumstances I would be looking for:
Data indexed by a string key with variable length (max. 255 characters, ASCII only is fine).
Binary data has variable length (500 bytes up to 10 KB).
Amount of data stored < 5,000 entries.
In production only functions "GetDataByKey" & "GetAllKeys" needed and therefore should be fast.
Adding data is not used in production and can therefore be slow.
Is there any simple C# based library that would fit to those requirements?
I was looking at some NoSQL databases, but this seems to be a bit over the top for such a very simple data structure.
As only a small percentage of the data records are used during an application run I would prefer not to just read everything into memory on application start (e. g. using serialization), but instead just read the entries from the file that are really needed during runtime.
Any ideas or tips would be much appreciated, thanks!
Use Binaryformater like code below :
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Runtime.InteropServices;
using System.Runtime.Serialization.Formatters.Binary;
using System.Xml.Serialization;
namespace ConsoleApplication1
{
class Program
{
const string FILENAME = #"c:\temp\test.bin";
static void Main(string[] args)
{
Read_Write readWrite = new Read_Write();
readWrite.CreateData(1000);
readWrite.WriteData(FILENAME);
Data data = readWrite.GetRecord(FILENAME, "101");
}
}
[Serializable()]
[XmlRoot(ElementName="ABC")]
public struct Data
{
public byte[] name;
public byte[] data;
}
public class Read_Write
{
[DllImport("msvcrt.dll", CallingConvention = CallingConvention.Cdecl)]
static extern int memcmp(byte[] b1, byte[] b2, long count);
const int MIN_SIZE = 500;
const int MAX_SIZE = 10000;
public List<Data> data { get; set; }
Dictionary<string, Data> dict = new Dictionary<string, Data>();
public void CreateData(int numberRecords)
{
data = new List<Data>();
for (int i = 0; i < numberRecords; i++)
{
Data newData = new Data();
string name = i.ToString() + '\0'; //null terminate string
newData.name = Encoding.UTF8.GetBytes(name);
Random rand = new Random();
int size = rand.Next(MIN_SIZE, MAX_SIZE);
newData.data = Enumerable.Range(0, size).Select(x => (byte)(rand.Next(0, 0xFF) & 0xFF)).ToArray();
data.Add(newData);
}
}
public void WriteData(string filename)
{
Stream writer = File.OpenWrite(filename);
//write number of records
byte[] numberOfRecords = BitConverter.GetBytes((int)data.Count());
writer.Write(numberOfRecords, 0, 4);
foreach (Data d in data)
{
BinaryFormatter formatter = new BinaryFormatter();
formatter.Serialize(writer, d);
}
writer.Flush();
writer.Close();
}
public Data GetRecord(string filename, string name)
{
Data record = new Data();
Stream reader = File.OpenRead(filename);
byte[] numberOfRecords = new byte[4];
reader.Read(numberOfRecords, 0, 4);
int records = BitConverter.ToInt32(numberOfRecords, 0);
DateTime start = DateTime.Now;
for(int i = 0; i < records; i++)
{
BinaryFormatter formatter = new BinaryFormatter();
Data d = (Data)formatter.Deserialize(reader);
//if (name == GetString(d.name))
//{
// record = d;
// break;
//}
}
DateTime end = DateTime.Now;
TimeSpan time = end - start;
reader.Close();
return record;
}
public string GetString(byte[] characters)
{
int length = characters.ToList().IndexOf(0x00);
return Encoding.UTF8.GetString(characters, 0, length);
}
}
}
As there seems not to be a solution/library available for this yet (probably, because the problem is just too simple to share it ;-) ), I've build a small class myself.
In case somebody else needs the same, that's the way I store this string key based binary data now:
internal class BinaryKeyStorage
{
private const string FILE_PATH = #"data.bin";
private static MemoryMappedFile _memoryFile;
private static MemoryMappedViewStream _memoryFileStream;
private static Dictionary<string, Entry> _index;
private class Entry
{
public Entry(int position, int length)
{
Position = position;
Length = length;
}
public int Position { get; }
public int Length { get; }
}
public static void CreateFile(Dictionary<string, byte[]> keyValues)
{
// 4 bytes for int count of entries
// and per entry:
// - string length + 1 byte for string prefix
// - 2x4 bytes for int address start and length
var headerLength = 4 + keyValues.Keys.Sum(dataKey => dataKey.Length + 9);
var nextStartPosition = headerLength;
using (var binaryWriter = new BinaryWriter(File.Open(FILE_PATH, FileMode.Create)))
{
binaryWriter.Write(keyValues.Count);
// writing header
foreach (var keyValue in keyValues)
{
binaryWriter.Write(keyValue.Key);
binaryWriter.Write(nextStartPosition);
binaryWriter.Write(keyValue.Value.Length);
nextStartPosition += keyValue.Value.Length;
}
// writing data
foreach (var keyValue in keyValues)
{
binaryWriter.Write(keyValue.Value);
}
}
}
public static List<string> GetAllKeys()
{
InitializeIndexIfNeeded();
return _index.Keys.ToList();
}
public static byte[] GetData(string key)
{
InitializeIndexIfNeeded();
var entry = _index[key];
_memoryFileStream.Seek(entry.Position, SeekOrigin.Begin);
var data = new byte[entry.Length];
_memoryFileStream.Read(data, 0, data.Length);
return data;
}
private static void InitializeIndexIfNeeded()
{
if (_memoryFile != null) return;
_memoryFile = MemoryMappedFile.CreateFromFile(FILE_PATH, FileMode.Open);
_memoryFileStream = _memoryFile.CreateViewStream();
_index = new Dictionary<string, Entry>();
using (var binaryReader = new BinaryReader(_memoryFileStream, Encoding.Default, true))
{
var count = binaryReader.ReadInt32();
for (var i = 0; i < count; i++)
{
var dataKey = binaryReader.ReadString();
var dataPosition = binaryReader.ReadInt32();
var dataLength = binaryReader.ReadInt32();
_index.Add(dataKey, new Entry(dataPosition, dataLength));
}
}
}
}
It just caches the file header/index (the string keys together with the position/length of the data) in memory, the actual data is read directly from the memory mapped file only if needed.
Related
I am trying to convert a zip file into a text file (xml) using the following methods. It works fine for smaller file but dose not seem to work for files larger than 50 mb.
class Program
{
public static void Main(string[] args)
{
try
{
string importFilePath = #"D:\CorpTax\Tasks\966442\CS Publish error\CSUPD20180604L.zip";
int maxLengthInMb = 20;
byte[] payLoad = File.ReadAllBytes(importFilePath);
int payLoadInMb = (payLoad.Length / 1024) / 1024;
bool splitIntoMultipleFiles = (payLoadInMb / maxLengthInMb) > 1;
int payLoadLength = splitIntoMultipleFiles ? maxLengthInMb * 1024 * 1024 : payLoad.Length;
if (splitIntoMultipleFiles)
{
foreach (byte[] splitPayLoad in payLoad.Slices(payLoadLength))
{
ToXml(payLoad);
}
}
}
catch (Exception ex)
{
throw new Exception(ex.Message);
}
}
public static string ToXml(byte[] payLoad)
{
using (XmlStringWriter xmlStringWriter = new XmlStringWriter())
{
xmlStringWriter.WriteStartDocument();
xmlStringWriter.Writer.WriteStartElement("Payload");
xmlStringWriter.Writer.WriteRaw(Convert.ToBase64String(payLoad));
xmlStringWriter.Writer.WriteEndElement();
xmlStringWriter.WriteEndDocument();
return xmlStringWriter.ToString();
}
}
}
I have a .zip file which is like 120 MB in size and I get the
System.OutOfMemoryException when calling Convert.ToBase64String().
So I went ahead and split the byte array into a size of 20 mb chunks hoping that it will not fail. But I see that it works until it goes through the loop 3 times i.e able to convert 60mb of the data and in the 4th iteration i get the same exception. Some times I also get exceptions at the line return xmlStringWriter.ToString()
To split the byte[] I have used the following extension classes
public static class ArrayExtensions
{
public static T[] CopySlice<T>(this T[] source, int index, int length, bool padToLength = false)
{
int n = length;
T[] slice = null;
if (source.Length < index + length)
{
n = source.Length - index;
if (padToLength)
{
slice = new T[length];
}
}
if (slice == null) slice = new T[n];
Array.Copy(source, index, slice, 0, n);
return slice;
}
public static IEnumerable<T[]> Slices<T>(this T[] source, int count, bool padToLength = false)
{
for (var i = 0; i < source.Length; i += count)
{
yield return source.CopySlice(i, count, padToLength);
}
}
}
I got the above code from the following link
Splitting a byte[] into multiple byte[] arrays in C#
Funny part is the program runs fine when I run it in a console application but when I put this code into the windows application it throws the System.OutOfMemoryException.
Preferablilty you want to be doing something like this
byte[] Packet = new byte[4096];
string b64str = "";
using (FileStream fs = new FileStream(file, FileMode.Open))
{
int i = Packet.Length;
while (i == Packet.Length)
{
i = fs.Read(Packet, 0, Packet.Length);
b64str = Convert.ToBase64String(Packet, 0, i);
}
}
with that b64str you should create your xml data.
Also it is typically unwise to allocate 20mb on stack all in one go.
So I am trying to experiment with hashtables saved on disk. When I try to debug and debugger gets to POINT 1, I open variables and see that Data and Key value gets an exception 'this.Key' threw an exception of type 'System.NullReferenceException'. I step over to POINT 2, and now Data gets value of "00-00-00-00-00-00-00-00-00-00-00-00". So from where that weird value is comming from? I have also put breakpoints on Data get and set, but by the time these points were not hit.
class HashtableFile
{
class Hashentry
{
const int KEY_BYTES = 4;
const int DATA_BYTES = 12;
public int? Key
{
get
{
byte[] bytes = new byte[KEY_BYTES];
fs.Seek((KEY_BYTES + DATA_BYTES) * Index, SeekOrigin.Begin);
fs.Read(bytes, 0, KEY_BYTES);
int data = BitConverter.ToInt32(bytes, 0);
return data;
}
set
{
byte[] bytes = new byte[KEY_BYTES];
int key = value.GetValueOrDefault(0);
bytes = BitConverter.GetBytes(key);
fs.Seek(KEY_BYTES * Index, SeekOrigin.Begin);
fs.Write(bytes, 0, KEY_BYTES);
}
}
public string Data
{
get
{
byte[] bytes = new byte[DATA_BYTES];
//seek key too
fs.Seek(((KEY_BYTES + DATA_BYTES) * Index) + KEY_BYTES, SeekOrigin.Begin);
fs.Read(bytes, 0, DATA_BYTES);
string data = BitConverter.ToString(bytes, 0);
return data;
}
set
{
byte[] bytes = new byte[DATA_BYTES];
string data = value ?? string.Empty;
data = data.Insert(data.Length, new string(' ', DATA_BYTES - data.Length));
bytes = Encoding.ASCII.GetBytes(data);
fs.Seek(((KEY_BYTES + DATA_BYTES) * Index) + KEY_BYTES, SeekOrigin.Begin);
fs.Write(bytes, 0, DATA_BYTES);
}
}
private int Index;
private FileStream fs;
public Hashentry(int? key, string data, int index, FileStream fs)//POINT 1
{
this.fs = fs;
Index = index; // POINT 2
Key = key;
Data = data;
}
public int GetIndex()
{
return Index;
}
public int? GetKey()
{
return Key;
}
public string GetData()
{
return Data;
}
}
private int maxSize;
private Hashentry[] table;
private string file;
private FileStream fs;
public HashtableFile(int size, string filename)
{
file = filename;
if (File.Exists(file))
File.Delete(file);
fs = new FileStream(file, FileMode.OpenOrCreate, FileAccess.ReadWrite);
maxSize = size;
table = new Hashentry[maxSize];
for (int i = 0; i < maxSize; i++)
{
table[i] = new Hashentry(null, null, i, fs);
}
}
}
Look at
public int? Key
{
get
{
byte[] bytes = new byte[KEY_BYTES];
fs.Seek((KEY_BYTES + DATA_BYTES) * Index, SeekOrigin.Begin);
You see? The second line of the Key getter is using fs. If fs == null (default uninitialized value for a field of a class) then NullReferenceException if you try to use Key
BUT
public Hashentry(int? key, string data, int index, FileStream fs)//POINT 1
{
this.fs = fs;
Index = index; // POINT 2
the first line of the constructor sets the fs! So no more NullReferenceException. So at POINT 1, this.fs == null, while at POINT 2, this.fs = something
Problem was that
string data = BitConverter.ToString(bytes, 0);
converts bytes directly to to string, not to corresponding characters. Solution was to change this line to:
string data = Encoding.ASCII.GetString(bytes);
So I am getting a variable number of coordinates (latitude and longitude). I want to pack these coordinates in a straight forward way that can be easily sent via UDP and unpacked. How can I go about this with C#?
I'm assuming I declare a struct and then use the built in marshalling to get a byte array to send. How does one do this when there is a variable number of points involved?
Most of my programming experience has been in Python, but I need this to be done in C#, which I have limited experience with.
edit: I'll add some code I was testing out, since I feel like no one responds to just text.
namespace ConsoleApplication1
{
class Testing
{
static void Main(string[] args)
{
// Console.WriteLine("Starting");
// string text = "Hello";
// byte[] data = Encoding.ASCII.GetBytes(text);
StartPacket test = new StartPacket();
test.len = 3;
List<double> points = new List<double>();
points.Add(3.14);
points.Add(5);
points.Add(-1023.1231311);
test.points = points;
byte[] data = StructureToByteArray(test);
SendUdp(65456, "192.168.20.100", data);
}
static void SendUdp(int srcPort, string dstIp, byte[] data)
{
Socket sock = new Socket(AddressFamily.InterNetwork, SocketType.Dgram,
ProtocolType.Udp);
IPAddress dst = IPAddress.Parse(dstIp);
IPEndPoint endPoint = new IPEndPoint(dst, srcPort);
sock.SendTo(data, endPoint);
}
public struct StartPacket
{
public uint len;
public List<double> points;
}
public static byte[] StructureToByteArray(object obj)
{
int len = Marshal.SizeOf(obj);
byte[] arr = new byte[len];
IntPtr ptr = Marshal.AllocHGlobal(len);
Marshal.StructureToPtr(obj, ptr, true);
Marshal.Copy(ptr, arr, 0, len);
Marshal.FreeHGlobal(ptr);
return arr;
}
}
}
This code fails with:
Type 'ConsoleApplication1.Testing+StartPacket' cannot be marshaled as an unmanaged structure; no meaningful size or offset can be computed.
This seems more like a serialization/deserialization task. The simplest approach is to mark the class with Serializable then use the BinaryFormatter like this:
public class Program
{
public static void Main(string[] args)
{
var startPacket = new StartPacket();
startPacket.len = 3;
startPacket.points = new List<double>() { 3.14, 5, -1023.1231311 };
// serialize into a byte array for Socket.SendTo()
var formatter = new BinaryFormatter();
var ms = new MemoryStream();
formatter.Serialize(ms, startPacket);
var bytes = ms.ToArray();
// assuming we received bytes[] from a socket, deserialize into StartPacket object
ms = new MemoryStream(bytes);
formatter = new BinaryFormatter();
startPacket = (StartPacket)formatter.Deserialize(ms);
}
}
[Serializable()]
public struct StartPacket
{
public uint len;
public List<double> points;
}
However, this is not very efficient memory-wise--this example generates a 524-byte serialized StartPacket, probably due to the fact that the List<> will have a capacity of more than 3 doubles. Making points an array with the specific size only gets us down to 211 bytes. Without looking into it, I'd guess that there's a lot of overhead in the serialized version of the struct (like the names and types of variables, etc). However, if you're not too concerned about the size of the packet, then this might work for you.
If you want something more efficient, then you can add methods to StartPacket like so:
public class Program
{
public static void Main(string[] args)
{
var startPacket = new StartPacket();
startPacket.len = 3;
startPacket.points = new List<double> { 3.14, 5, -1023.1231311 };
// create an array to send through the socket
var arr = startPacket.ToArray();
// create a StartPacket from an array we received from a socket
var newStartPacket = StartPacket.FromArray(arr);
}
}
public struct StartPacket
{
public uint len;
public List<double> points;
public byte[] ToArray()
{
var arr = BitConverter.GetBytes(len);
foreach (var point in points)
{
arr = arr.Concat(BitConverter.GetBytes(point)).ToArray();
}
return arr;
}
public static StartPacket FromArray(byte[] array)
{
var sp = new StartPacket();
sp.len = BitConverter.ToUInt32(array, 0);
sp.points = new List<double>();
for (int i = 0; i < sp.len; i++)
{
sp.points.Add(BitConverter.ToDouble(array, 4 + i * 8));
}
return sp;
}
}
Note that neither of these account for the endianness of the system. Hope this helps.
Is my method "ToByte()" the correct way to convert my data into
bytes, in order to be sent via Udp?
How can I do the reverse? (Watch the "received packet constructor")
Should I worry about the integrity of the received packet in order to
avoid exceptions?
public class GameDataPacket
{
private MultiplayerService.Command command_;
private string nickname_;
private float x_;
private float y_;
public GameDataPacket()
{
this.command_ = MultiplayerService.Command.Nothing;
this.nickname_ = "noname";
this.x_ = 0.0f;
this.y_ = 0.0f;
}
// Constructor for received packet
public GameDataPacket(byte[] data)
{
this.command_ = (MultiplayerService.Command)BitConverter.ToInt32(data, 0);
// How to get those values from the RECEIVED packet?
this.nickname_ = Encoding.UTF8.GetString(data, ??, ??);
this.x_ = BitConverter.ToSingle(data, ??);
this.y_ = BitConverter.ToSingle(data, ??);
}
How I transform into bytes my informations:
public byte[] ToByte()
{
// FORMAT
// |Command|Name|x|y|
List<byte> result = new List<byte>();
result.AddRange(BitConverter.GetBytes((int)command_));
result.AddRange(Encoding.UTF8.GetBytes(nickname_));
result.AddRange(BitConverter.GetBytes(x_));
result.AddRange(BitConverter.GetBytes(y_));
return result.ToArray();
}
Because the string could be any length you'll need to either send the length of the name, or add something to the packet that lets you detect the end of the string. Prefixing the length is probably the easiest and most straight forward.
Updated Constructor and To-bytes Method:
// Constructor for received packet
public GameDataPacket(byte[] data)
{
int packetOffset = 0;
this.command_ = (MultiplayerService.Command)BitConverter.ToInt32(data, packetOffset);
packetOffset += 4;
// read the length (in bytes) of the nickname
int nickDataLength = BitConverter.ToInt32(data, packetOffset);
packetOffset += 4;
// read the nick name
this.nickname_ = Encoding.UTF8.GetString(data, packetOffset, nickDataLength);
packetOffset += nickDataLength;
this.x_ = BitConverter.ToSingle(data, packetOffset);
packetOffset += 4;
this.y_ = BitConverter.ToSingle(data, packetOffset);
}
public byte[] ToByte()
{
// FORMAT
// |Command|NameDataLength|Name|x|y|
List<byte> result = new List<byte>();
result.AddRange(BitConverter.GetBytes((int)command_));
byte[] nicknameBytes = Encoding.UTF8.GetBytes(nickname_);
result.AddRange(BitConverter.GetBytes(nicknameBytes.Length));
result.AddRange(nicknameBytes);
result.AddRange(BitConverter.GetBytes(x_));
result.AddRange(BitConverter.GetBytes(y_));
return result.ToArray();
}
You could use BinaryFormatter...
[Serializable]
public class MyClass
{
public int MyProperty { get; set; }
}
// On the sending side...
var myClass = new MyClass { MyProperty = 1 };
byte[] bytes = null;
using (var ms = new MemoryStream())
{
var bf = new BinaryFormatter();
bf.Serialize(ms, myClass);
bytes = ms.ToArray();
}
// On the receiving side...
MyClass other = null;
using (var ms = new MemoryStream(bytes))
{
var bf = new BinaryFormatter();
other = (MyClass)bf.Deserialize(ms);
}
What is the best method to replace sequence of bytes in binary file to the same length of other bytes? The binary files will be pretty large, about 50 mb and should not be loaded at once in memory.
Update: I do not know location of bytes which needs to be replaced, I need to find them first.
Assuming you're trying to replace a known section of the file.
Open a FileStream with read/write access
Seek to the right place
Overwrite existing data
Sample code coming...
public static void ReplaceData(string filename, int position, byte[] data)
{
using (Stream stream = File.Open(filename, FileMode.Open))
{
stream.Position = position;
stream.Write(data, 0, data.Length);
}
}
If you're effectively trying to do a binary version of a string.Replace (e.g. "always replace bytes { 51, 20, 34} with { 20, 35, 15 } then it's rather harder. As a quick description of what you'd do:
Allocate a buffer of at least the size of data you're interested in
Repeatedly read into the buffer, scanning for the data
If you find a match, seek back to the right place (e.g. stream.Position -= buffer.Length - indexWithinBuffer; and overwrite the data
Sounds simple so far... but the tricky bit is if the data starts near the end of the buffer. You need to remember all potential matches and how far you've matched so far, so that if you get a match when you read the next buffer's-worth, you can detect it.
There are probably ways of avoiding this trickiness, but I wouldn't like to try to come up with them offhand :)
EDIT: Okay, I've got an idea which might help...
Keep a buffer which is at least twice as big as you need
Repeatedly:
Copy the second half of the buffer into the first half
Fill the second half of the buffer from the file
Search throughout the whole buffer for the data you're looking for
That way at some point, if the data is present, it will be completely within the buffer.
You'd need to be careful about where the stream was in order to get back to the right place, but I think this should work. It would be trickier if you were trying to find all matches, but at least the first match should be reasonably simple...
My solution :
/// <summary>
/// Copy data from a file to an other, replacing search term, ignoring case.
/// </summary>
/// <param name="originalFile"></param>
/// <param name="outputFile"></param>
/// <param name="searchTerm"></param>
/// <param name="replaceTerm"></param>
private static void ReplaceTextInBinaryFile(string originalFile, string outputFile, string searchTerm, string replaceTerm)
{
byte b;
//UpperCase bytes to search
byte[] searchBytes = Encoding.UTF8.GetBytes(searchTerm.ToUpper());
//LowerCase bytes to search
byte[] searchBytesLower = Encoding.UTF8.GetBytes(searchTerm.ToLower());
//Temporary bytes during found loop
byte[] bytesToAdd = new byte[searchBytes.Length];
//Search length
int searchBytesLength = searchBytes.Length;
//First Upper char
byte searchByte0 = searchBytes[0];
//First Lower char
byte searchByte0Lower = searchBytesLower[0];
//Replace with bytes
byte[] replaceBytes = Encoding.UTF8.GetBytes(replaceTerm);
int counter = 0;
using (FileStream inputStream = File.OpenRead(originalFile)) {
//input length
long srcLength = inputStream.Length;
using (BinaryReader inputReader = new BinaryReader(inputStream)) {
using (FileStream outputStream = File.OpenWrite(outputFile)) {
using (BinaryWriter outputWriter = new BinaryWriter(outputStream)) {
for (int nSrc = 0; nSrc < srcLength; ++nSrc)
//first byte
if ((b = inputReader.ReadByte()) == searchByte0
|| b == searchByte0Lower) {
bytesToAdd[0] = b;
int nSearch = 1;
//next bytes
for (; nSearch < searchBytesLength; ++nSearch)
//get byte, save it and test
if ((b = bytesToAdd[nSearch] = inputReader.ReadByte()) != searchBytes[nSearch]
&& b != searchBytesLower[nSearch]) {
break;//fail
}
//Avoid overflow. No need, in my case, because no chance to see searchTerm at the end.
//else if (nSrc + nSearch >= srcLength)
// break;
if (nSearch == searchBytesLength) {
//success
++counter;
outputWriter.Write(replaceBytes);
nSrc += nSearch - 1;
}
else {
//failed, add saved bytes
outputWriter.Write(bytesToAdd, 0, nSearch + 1);
nSrc += nSearch;
}
}
else
outputWriter.Write(b);
}
}
}
}
Console.WriteLine("ReplaceTextInBinaryFile.counter = " + counter);
}
You can use my BinaryUtility to search and replace one or more bytes without loading the entire file into memory like this:
var searchAndReplace = new List<Tuple<byte[], byte[]>>()
{
Tuple.Create(
BitConverter.GetBytes((UInt32)0xDEADBEEF),
BitConverter.GetBytes((UInt32)0x01234567)),
Tuple.Create(
BitConverter.GetBytes((UInt32)0xAABBCCDD),
BitConverter.GetBytes((UInt16)0xAFFE)),
};
using(var reader =
new BinaryReader(new FileStream(#"C:\temp\data.bin", FileMode.Open)))
{
using(var writer =
new BinaryWriter(new FileStream(#"C:\temp\result.bin", FileMode.Create)))
{
BinaryUtility.Replace(reader, writer, searchAndReplace);
}
}
BinaryUtility code:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
public static class BinaryUtility
{
public static IEnumerable<byte> GetByteStream(BinaryReader reader)
{
const int bufferSize = 1024;
byte[] buffer;
do
{
buffer = reader.ReadBytes(bufferSize);
foreach (var d in buffer) { yield return d; }
} while (bufferSize == buffer.Length);
}
public static void Replace(BinaryReader reader, BinaryWriter writer, IEnumerable<Tuple<byte[], byte[]>> searchAndReplace)
{
foreach (byte d in Replace(GetByteStream(reader), searchAndReplace)) { writer.Write(d); }
}
public static IEnumerable<byte> Replace(IEnumerable<byte> source, IEnumerable<Tuple<byte[], byte[]>> searchAndReplace)
{
foreach (var s in searchAndReplace)
{
source = Replace(source, s.Item1, s.Item2);
}
return source;
}
public static IEnumerable<byte> Replace(IEnumerable<byte> input, IEnumerable<byte> from, IEnumerable<byte> to)
{
var fromEnumerator = from.GetEnumerator();
fromEnumerator.MoveNext();
int match = 0;
foreach (var data in input)
{
if (data == fromEnumerator.Current)
{
match++;
if (fromEnumerator.MoveNext()) { continue; }
foreach (byte d in to) { yield return d; }
match = 0;
fromEnumerator.Reset();
fromEnumerator.MoveNext();
continue;
}
if (0 != match)
{
foreach (byte d in from.Take(match)) { yield return d; }
match = 0;
fromEnumerator.Reset();
fromEnumerator.MoveNext();
}
yield return data;
}
if (0 != match)
{
foreach (byte d in from.Take(match)) { yield return d; }
}
}
}
public static void BinaryReplace(string sourceFile, byte[] sourceSeq, string targetFile, byte[] targetSeq)
{
FileStream sourceStream = File.OpenRead(sourceFile);
FileStream targetStream = File.Create(targetFile);
try
{
int b;
long foundSeqOffset = -1;
int searchByteCursor = 0;
while ((b=sourceStream.ReadByte()) != -1)
{
if (sourceSeq[searchByteCursor] == b)
{
if (searchByteCursor == sourceSeq.Length - 1)
{
targetStream.Write(targetSeq, 0, targetSeq.Length);
searchByteCursor = 0;
foundSeqOffset = -1;
}
else
{
if (searchByteCursor == 0)
{
foundSeqOffset = sourceStream.Position - 1;
}
++searchByteCursor;
}
}
else
{
if (searchByteCursor == 0)
{
targetStream.WriteByte((byte) b);
}
else
{
targetStream.WriteByte(sourceSeq[0]);
sourceStream.Position = foundSeqOffset + 1;
searchByteCursor = 0;
foundSeqOffset = -1;
}
}
}
}
finally
{
sourceStream.Dispose();
targetStream.Dispose();
}
}