Currently, there isn't a NetworkStream.Peek method in C#. What is the best way of implementing such a method which functions just like NetworkStream.ReadByte except that the returned byte is not actually removed from the Stream?
I ran into the same 'peek for magic number and then decide which stream processor to send the stream to' requirement and unfortunately can't weasel my way out of that problem - as suggested in comments to Aaronaught's answer - by passing the already consumed bytes into the stream processing methods in separate parameters, as those methods are a given and they expect System.IO.Stream and nothing else.
I solved this by creating a more or less universal PeekableStream class that wraps a Stream. It works for NetworkStreams, but also for any other Stream, provided you Stream.CanRead it.
Edit
Alternatively, you could use the brand new ReadSeekableStream and do
var readSeekableStream = new ReadSeekableStream(networkStream, /* >= */ count);
...
readSeekableStream.Read(..., count);
readSeekableStream.Seek(-count, SeekOrigin.Current);
In any event, here comes PeekableStream:
/// <summary>
/// PeekableStream wraps a Stream and can be used to peek ahead in the underlying stream,
/// without consuming the bytes. In other words, doing Peek() will allow you to look ahead in the stream,
/// but it won't affect the result of subsequent Read() calls.
///
/// This is sometimes necessary, e.g. for peeking at the magic number of a stream of bytes and decide which
/// stream processor to hand over the stream.
/// </summary>
public class PeekableStream : Stream
{
private readonly Stream underlyingStream;
private readonly byte[] lookAheadBuffer;
private int lookAheadIndex;
public PeekableStream(Stream underlyingStream, int maxPeekBytes)
{
this.underlyingStream = underlyingStream;
lookAheadBuffer = new byte[maxPeekBytes];
}
protected override void Dispose(bool disposing)
{
if (disposing)
underlyingStream.Dispose();
base.Dispose(disposing);
}
/// <summary>
/// Peeks at a maximum of count bytes, or less if the stream ends before that number of bytes can be read.
///
/// Calls to this method do not influence subsequent calls to Read() and Peek().
///
/// Please note that this method will always peek count bytes unless the end of the stream is reached before that - in contrast to the Read()
/// method, which might read less than count bytes, even though the end of the stream has not been reached.
/// </summary>
/// <param name="buffer">An array of bytes. When this method returns, the buffer contains the specified byte array with the values between offset and
/// (offset + number-of-peeked-bytes - 1) replaced by the bytes peeked from the current source.</param>
/// <param name="offset">The zero-based byte offset in buffer at which to begin storing the data peeked from the current stream.</param>
/// <param name="count">The maximum number of bytes to be peeked from the current stream.</param>
/// <returns>The total number of bytes peeked into the buffer. If it is less than the number of bytes requested then the end of the stream has been reached.</returns>
public virtual int Peek(byte[] buffer, int offset, int count)
{
if (count > lookAheadBuffer.Length)
throw new ArgumentOutOfRangeException("count", "must be smaller than peekable size, which is " + lookAheadBuffer.Length);
while (lookAheadIndex < count)
{
int bytesRead = underlyingStream.Read(lookAheadBuffer, lookAheadIndex, count - lookAheadIndex);
if (bytesRead == 0) // end of stream reached
break;
lookAheadIndex += bytesRead;
}
int peeked = Math.Min(count, lookAheadIndex);
Array.Copy(lookAheadBuffer, 0, buffer, offset, peeked);
return peeked;
}
public override bool CanRead { get { return true; } }
public override long Position
{
get
{
return underlyingStream.Position - lookAheadIndex;
}
set
{
underlyingStream.Position = value;
lookAheadIndex = 0; // this needs to be done AFTER the call to underlyingStream.Position, as that might throw NotSupportedException,
// in which case we don't want to change the lookAhead status
}
}
public override int Read(byte[] buffer, int offset, int count)
{
int bytesTakenFromLookAheadBuffer = 0;
if (count > 0 && lookAheadIndex > 0)
{
bytesTakenFromLookAheadBuffer = Math.Min(count, lookAheadIndex);
Array.Copy(lookAheadBuffer, 0, buffer, offset, bytesTakenFromLookAheadBuffer);
count -= bytesTakenFromLookAheadBuffer;
offset += bytesTakenFromLookAheadBuffer;
lookAheadIndex -= bytesTakenFromLookAheadBuffer;
if (lookAheadIndex > 0) // move remaining bytes in lookAheadBuffer to front
// copying into same array should be fine, according to http://msdn.microsoft.com/en-us/library/z50k9bft(v=VS.90).aspx :
// "If sourceArray and destinationArray overlap, this method behaves as if the original values of sourceArray were preserved
// in a temporary location before destinationArray is overwritten."
Array.Copy(lookAheadBuffer, lookAheadBuffer.Length - bytesTakenFromLookAheadBuffer + 1, lookAheadBuffer, 0, lookAheadIndex);
}
return count > 0
? bytesTakenFromLookAheadBuffer + underlyingStream.Read(buffer, offset, count)
: bytesTakenFromLookAheadBuffer;
}
public override int ReadByte()
{
if (lookAheadIndex > 0)
{
lookAheadIndex--;
byte firstByte = lookAheadBuffer[0];
if (lookAheadIndex > 0) // move remaining bytes in lookAheadBuffer to front
Array.Copy(lookAheadBuffer, 1, lookAheadBuffer, 0, lookAheadIndex);
return firstByte;
}
else
{
return underlyingStream.ReadByte();
}
}
public override long Seek(long offset, SeekOrigin origin)
{
long ret = underlyingStream.Seek(offset, origin);
lookAheadIndex = 0; // this needs to be done AFTER the call to underlyingStream.Seek(), as that might throw NotSupportedException,
// in which case we don't want to change the lookAhead status
return ret;
}
// from here on, only simple delegations to underlyingStream
public override bool CanSeek { get { return underlyingStream.CanSeek; } }
public override bool CanWrite { get { return underlyingStream.CanWrite; } }
public override bool CanTimeout { get { return underlyingStream.CanTimeout; } }
public override int ReadTimeout { get { return underlyingStream.ReadTimeout; } set { underlyingStream.ReadTimeout = value; } }
public override int WriteTimeout { get { return underlyingStream.WriteTimeout; } set { underlyingStream.WriteTimeout = value; } }
public override void Flush() { underlyingStream.Flush(); }
public override long Length { get { return underlyingStream.Length; } }
public override void SetLength(long value) { underlyingStream.SetLength(value); }
public override void Write(byte[] buffer, int offset, int count) { underlyingStream.Write(buffer, offset, count); }
public override void WriteByte(byte value) { underlyingStream.WriteByte(value); }
}
If you don't need to actually retrieve the byte, you can refer to the DataAvailable property.
Otherwise, you can wrap it with a StreamReader and invoke its Peek method.
Note that neither of these are particularly reliable for reading from a network stream, due to latency issues. The data might become available (present in the read buffer) the very instant after you peek.
I'm not sure what it is that you intend to do with this, but the Read method on NetworkStream is a blocking call, so you don't really need to check for status, even if you are receiving in chunks. If you are trying to keep the application responsive while reading from the stream, you should use a thread or asynchronous call to receive the data instead.
Edit: According to this post, StreamReader.Peek is buggy on a NetworkStream, or at least has undocumented behaviour, so be careful if you choose to go that route.
Updated - response to comments
The notion of a "peek" on the actual stream itself is actually impossible; it's just a stream, and once the byte is received then it is no longer on the stream. Some streams support seeking so you could technically re-read that byte, but NetworkStream isn't one of them.
Peeking only applies when are reading the stream into a buffer; once the data is in a buffer then peeking is easy because you just check whatever's at the current position in the buffer. That's why a StreamReader is able to do this; no Stream class will generally have its own Peek method.
Now, for this problem specifically, I question whether or not this is really the right answer. I understand the idea of dynamically selecting a method for processing the stream, but do you really need to do this on the raw stream? Can you not read the stream into a byte array first, or even copy it into a MemoryStream, and process it from that point on?
The main issue I see is that if something bad happens when you're reading from a network stream, the data is gone. But if you read it into a temporary location first, you can debug this. You can find out what the data was and why the object that was trying to process the data failed halfway through.
In general, the very first thing you want to do with a NetworkStream is read it into a local buffer. The only reason I can think of not to do this is if you're reading an enormous amount of data - and even then, I might consider using the file system as an intermediate buffer if it won't fit in memory.
I don't know your exact requirements, but from what I've learned so far, my advice would be: Don't try to process your data directly from the NetworkStream unless there is a compelling reason to do so. Consider reading the data into memory or onto disk first, then processing the copy.
If you have access to the Socket object, you could try the Receive method, passing SocketFlags.Peek. This is analogous to the MSG_PEEK flag that can be passed to the recv call in BSD Sockets or Winsock.
Here is a very simple PeekStream implementation that allows you to peek a certain number of bytes at the start of the stream only (as opposed to being able to peek at any time). The peeked bytes are returned as a Stream themselves, to minimize changes to existing code.
Here's how you use it:
Stream nonSeekableStream = ...;
PeekStream peekStream = new PeekStream(nonSeekableStream, 30); // Peek max 30 bytes
Stream initialBytesStream = peekStream.GetInitialBytesStream();
ParseHeaders(initialBytesStream); // Work on initial bytes of nonSeekableStream
peekStream.Read(...) // Read normally, the read will start from the beginning
GetInitialBytesStream() returns a seekable stream that contains up to peekSize initial bytes of the underlying stream (less if the stream is shorter than peekSize).
Because of its simplicity, reading PeekStream should only be marginally slower (if at all) than reading underlying stream directly.
public class PeekStream : Stream
{
private Stream m_stream;
private byte[] m_buffer;
private int m_start;
private int m_end;
public PeekStream(Stream stream, int peekSize)
{
if (stream == null)
{
throw new ArgumentNullException("stream");
}
if (!stream.CanRead)
{
throw new ArgumentException("Stream is not readable.");
}
if (peekSize < 0)
{
throw new ArgumentOutOfRangeException("peekSize");
}
m_stream = stream;
m_buffer = new byte[peekSize];
m_end = stream.Read(m_buffer, 0, peekSize);
}
public override bool CanRead
{
get
{
return true;
}
}
public override bool CanWrite
{
get
{
return false;
}
}
public override bool CanSeek
{
get
{
return false;
}
}
public override long Length
{
get
{
throw new NotSupportedException();
}
}
public override long Position
{
get
{
throw new NotSupportedException();
}
set
{
throw new NotSupportedException();
}
}
public MemoryStream GetInitialBytesStream()
{
return new MemoryStream(m_buffer, 0, m_end, false);
}
public override long Seek(long offset, SeekOrigin origin)
{
throw new NotSupportedException();
}
public override void SetLength(long value)
{
throw new NotSupportedException();
}
public override int Read(byte[] buffer, int offset, int count)
{
// Validate arguments
if (buffer == null)
{
throw new ArgumentNullException("buffer");
}
if (offset < 0)
{
throw new ArgumentOutOfRangeException("offset");
}
if (offset + count > buffer.Length)
{
throw new ArgumentOutOfRangeException("count");
}
int totalRead = 0;
// Read from buffer
if (m_start < m_end)
{
int toRead = Math.Min(m_end - m_start, count);
Array.Copy(m_buffer, m_start, buffer, offset, toRead);
m_start += toRead;
offset += toRead;
count -= toRead;
totalRead += toRead;
}
// Read from stream
if (count > 0)
{
totalRead += m_stream.Read(buffer, offset, count);
}
// Return total bytes read
return totalRead;
}
public override void Write(byte[] buffer, int offset, int count)
{
throw new NotImplementedException();
}
public override int ReadByte()
{
if (m_start < m_end)
{
return m_buffer[m_start++];
}
else
{
return m_stream.ReadByte();
}
}
public override void Flush()
{
m_stream.Flush();
}
protected override void Dispose(bool disposing)
{
if (disposing)
{
m_stream.Dispose();
}
base.Dispose(disposing);
}
}
Disclaimer: PeekStream above is taken from a working program, but it's not comprehensively tested, so may contain bugs. It works for me, but you might uncover some corner cases where is fails.
FWIW, here is a peekable stream over a non-seekable one, optimized for just one byte ahead:
public class OneBytePeekableStream : Stream
{
private readonly bool _disposeStreamOnDispose;
private readonly Stream _stream;
private int _buffer; // byte or -1
private int _bufferLength; // 0 or 1
public OneBytePeekableStream(Stream stream, bool disposeStreamOnDispose)
{
if (stream == null)
throw new ArgumentNullException(nameof(stream));
_stream = stream;
_disposeStreamOnDispose = disposeStreamOnDispose;
}
public override long Length => _stream.Length;
public override bool CanRead => _stream.CanRead;
public override bool CanSeek => _stream.CanSeek;
public override bool CanWrite => _stream.CanWrite;
public override bool CanTimeout => _stream.CanTimeout;
public override int ReadTimeout { get => _stream.ReadTimeout; set => _stream.ReadTimeout = value; }
public override int WriteTimeout { get => _stream.WriteTimeout; set => _stream.WriteTimeout = value; }
public override long Position { get => _stream.Position - _bufferLength; set { _stream.Position = value; _bufferLength = 0; } }
public override int Read(byte[] buffer, int offset, int count)
{
if (buffer == null)
throw new ArgumentNullException(nameof(buffer));
if (offset < 0)
throw new ArgumentOutOfRangeException(nameof(offset));
if (count < 0)
throw new ArgumentOutOfRangeException(nameof(count));
if (buffer.Length - offset < count)
throw new ArgumentOutOfRangeException(nameof(count));
if (count == 0)
return 0;
if (_bufferLength == 0)
return _stream.Read(buffer, offset, count);
if (_buffer < 0)
return 0;
_bufferLength = 0;
buffer[offset] = (byte)_buffer;
if (count == 1)
return count;
var read = _stream.Read(buffer, offset + 1, count - 1);
return read + 1;
}
// this is the sole reason of this class
// returns -1 is stream is EOF
public virtual int PeekByte()
{
if (_bufferLength > 0)
return _buffer;
_buffer = _stream.ReadByte();
_bufferLength = 1;
return _buffer;
}
public override int ReadByte()
{
if (_bufferLength == 0)
return _stream.ReadByte();
if (_buffer < 0)
return -1;
_bufferLength = 0;
return _buffer;
}
public override long Seek(long offset, SeekOrigin origin)
{
var ret = _stream.Seek(offset, origin);
_bufferLength = 0;
return ret;
}
public override void Flush() => _stream.Flush();
public override void SetLength(long value) => _stream.SetLength(value);
public override void WriteByte(byte value) => _stream.WriteByte(value);
public override void Write(byte[] buffer, int offset, int count) => _stream.Write(buffer, offset, count);
protected override void Dispose(bool disposing)
{
if (disposing)
{
if (_disposeStreamOnDispose)
{
_stream.Dispose();
}
}
base.Dispose(disposing);
}
}
Related
I want to use a third party dll function which requires a stream input.
The data I need to feed it is provided by a different third party dll function, which only offers access to the source data by using a 'ReadBuffer' option to obtain chunks of data at a time, by populating a byte array of a set length.
The data I'm reading exceeds several TB, so I'm unable to just write a loop and write all the data to memory and then into a stream.
Is their a simple way to create a stream from data which is being read into a byte array buffer within a while loop as the stream is read?
I'm writing in C# & thanks for any pointers
Thanks
You should inherit the Stream Class. It is possible your library offers all the data from an inner Stream you may read and set in your derived class (Current position, Data Length, if can be readed, etc).
This way you can create a new object and use in the other library
public class MyStream : Stream
{
private LibraryClient _client;
// Wrapper
public MyStream(LibraryClient libraryClient)
{
buffer = new byte[bufferSize];
_client = libraryClient;
}
// Return the client length
public override long Length => _client.DataLength;
// Specify the position in your buffer, if the client has this info, reference it
public override long Position { get; set; }
public override int Read(byte[] buffer, int offset, int count)
{
// temp array in local var for example. you may instance it or rent it as well
var tmp = new byte[count - offset];
_client.ReadBuffer(tmp, Position, count);
//copy all (we skip count check because it's local variable)
tmp.CopyTo(buffer, offset);
return count - offset; //if the library 'ReadBuffer' returns the length readed, place it here
}
public override long Seek(long offset, SeekOrigin origin)
{
//... this method moves the Position of the Stream
//use it to update the inner position
if (offset > _client.DataLength) throw new ArgumentOutOfRangeException();
long tempPosition = 0;
switch (origin)
{
case SeekOrigin.Begin:
{
tempPosition = offset;
break;
}
case SeekOrigin.Current:
{
tempPosition = Position + offset;
Position = tempPosition;
break;
}
case SeekOrigin.End:
{
tempPosition = _client.DataLength + offset;
break;
}
}
if (tempPosition < 0) throw new IOException("Offset too backward");
if (tempPosition > _client.DataLength) throw new IOException("Offset too foward");
Position = tempPosition;
return offset;
}
public override void SetLength(long value)
{
// ... handle if neccesary
}
public override void Write(byte[] buffer, int offset, int count)
{
// ... handle if neccesary
}
public override void Flush()
{
// handle if neccesary
}
// Modify if neccesary
public override bool CanWrite => false;
public override bool CanSeek => true;
public override bool CanRead => Position < Length;
}
What are the possible causes for Stream not Writable Exception when serializing custom object over TCP using Network Stream in C#.
I am Sending the Mp3 data in the form of Packets.The Frame consists of Byte[] Buffer.I am Using Binary Formatter to serialize the object.
BinaryFormatter.Serialize(NetworkStream,Packet);
The Mp3 Played at client with distortion and jitters end for few seconds and then The above mentioned exception raised.I m using NAudio Open Source library for it.
Before doing this modification I was using
NetworkStream.Write(Byte[] Buffer,0,EncodedSizeofMp3);
and it was writing it successfully before giving any exception
If you are writing to a NetworkStream, the stream/socket could be closed
If you are writing to a NetworkStream, it could have been created with FileAccess.Read
If I had to guess, though, it sounds like something is closing the stream - this can be the case if, say, a "writer" along the route assumes it owns the stream, so closes the stream prematurely. It is pretty common to have to write and use some kind of wrapper Stream that ignores Close() requests (I have one in front of me right now, in fact, since I'm writing some TCP code).
As a small aside; I generally advise against BinaryFormatter for comms (except remoting) - most importantly: it doesn't "version" in a very friendly way, but it also tends to be a bit verbose in most cases.
Here's the wrapper I'm using currently, in case it helps (the Reset() method spoofs resetting the position, so the caller can read a relative position):
class NonClosingNonSeekableStream : Stream
{
public NonClosingNonSeekableStream(Stream tail)
{
if(tail == null) throw new ArgumentNullException("tail");
this.tail = tail;
}
private long position;
private readonly Stream tail;
public override bool CanRead
{
get { return tail.CanRead; }
}
public override bool CanWrite
{
get { return tail.CanWrite; }
}
public override bool CanSeek
{
get { return false; }
}
public override bool CanTimeout
{
get { return false; }
}
public override long Position
{
get { return position; }
set { throw new NotSupportedException(); }
}
public override void Flush()
{
tail.Flush();
}
public override void SetLength(long value)
{
throw new NotSupportedException();
}
public override long Seek(long offset, SeekOrigin origin)
{
throw new NotSupportedException();
}
public override long Length
{
get { throw new NotSupportedException(); }
}
public override int Read(byte[] buffer, int offset, int count)
{
int read = tail.Read(buffer, offset, count);
if (read > 0) position += read;
return read;
}
public override void Write(byte[] buffer, int offset, int count)
{
tail.Write(buffer, offset, count);
if (count > 0) position += count;
}
public override int ReadByte()
{
int result = tail.ReadByte();
if (result >= 0) position++;
return result;
}
public override void WriteByte(byte value)
{
tail.WriteByte(value);
position++;
}
public void Reset()
{
position = 0;
}
}
I have a WCF service that streams large files (100MB+) from a remote FTP server.
[ServiceContract]
public interface IStreamService
{
[OperationContract]
Stream GetDataFromFtp();
}
public class StreamService : IStreamService
{
public Stream GetDataFromFtp()
{
Stream ftpStream = Retr(...);
return ftpStream;
}
}
As WCF streams the file to the client, I want to stream it into a local cache, so that future requests don't need to go all the way back to the remote FTP - I can just serve them off disk.
I'm having problems achieving this without buffering the entire 100MB file stream in memory before returning it.
I tried using a simple capturing stream wrapper that does a write for each read:
public class CapturingStreamWrapper : Stream
{
private readonly Stream stream;
private readonly Stream captureStream;
public override int Read(byte[] buffer, int offset, int count)
{
int readBytes = stream.Read(buffer, offset, count);
captureStream.Write(buffer, offset, readBytes);
return readBytes;
}
}
public class StreamService : IStreamService
{
public Stream GetDataFromFtp()
{
Stream ftpStream = Retr(...);
Stream cacheStream = File.OpenWrite(...);
return new CapturingStreamWrapper(ftpStream, cacheStream);
}
}
But this didn't seem to work.
Additionally this doesn't provide any error handling - I need a catch block for the cache to delete any half-written file if the client transfer fails (i.e. transactional cache). I'm not sure how this would work because I don't know when streams are called/cleaned up in the WCF lifecycle.
Any ideas how I can stream into a file as I stream back to the client?
I ended up writing a couple of interconnected stream classes - one that pipes to the other as it reads. Apologies for the large code paste:
/// <summary>
/// A stream that, as it reads, makes those bytes available on an ouput
/// stream. Thread safe.
/// </summary>
public class CacheStream : Stream
{
private readonly Stream stream;
public CacheStream(Stream stream)
{
if (stream == null) throw new ArgumentNullException("stream");
this.stream = stream;
OutputStream = new CacheOutputStream(this);
}
public event EventHandler<BytesReadEventArgs> BytesRead = delegate { };
public event EventHandler Closing = delegate { };
public Stream OutputStream { get; private set; }
public override void Flush()
{
stream.Flush();
}
public override long Seek(long offset, SeekOrigin origin)
{
throw new InvalidOperationException("Cannot seek in CachingStream.");
}
public override void SetLength(long value)
{
stream.SetLength(value);
}
public override int Read(byte[] buffer, int offset, int count)
{
int numberOfBytesRead = stream.Read(buffer, offset, count);
if (numberOfBytesRead > 0)
PipeToOutputStream(buffer, offset, numberOfBytesRead);
return numberOfBytesRead;
}
private void PipeToOutputStream(byte[] buffer, int offset, int numberOfBytesRead)
{
var tmp = new byte[numberOfBytesRead];
Array.Copy(buffer, offset, tmp, 0, numberOfBytesRead);
BytesRead(this, new BytesReadEventArgs(tmp));
}
public override void Write(byte[] buffer, int offset, int count)
{
throw new InvalidOperationException("Cannot write in CachingStream.");
}
public override bool CanRead
{
get { return stream.CanRead; }
}
public override bool CanSeek
{
get { return false; }
}
public override bool CanWrite
{
get { return false; }
}
public override long Length
{
get { return stream.Length; }
}
public override long Position
{
get { return stream.Position; }
set { throw new InvalidOperationException("Cannot set position in CachingStream."); }
}
public override void Close()
{
Closing(this, EventArgs.Empty);
base.Close();
}
protected override void Dispose(bool disposing)
{
base.Dispose(disposing);
OutputStream.Dispose();
}
}
And
/// <summary>
/// Output portion of CacheStream. Streams bytes from a queue of buffers.
/// Thread safe.
/// </summary>
public class CacheOutputStream : Stream
{
private volatile int position;
private volatile int length;
private volatile bool sourceIsClosed;
// No Deque<T> in the BCL yet, but LinkedList is more or less the same.
private readonly LinkedList<byte[]> buffers = new LinkedList<byte[]>();
public CacheOutputStream(CacheStream stream)
{
if (stream == null) throw new ArgumentNullException("stream");
stream.BytesRead += (o, e) => AddToQueue(e.Buffer);
stream.Closing += (o, e) => sourceIsClosed = true;
}
private void AddToQueue(byte[] buffer)
{
if (buffer.Length == 0)
return;
lock (buffers)
{
buffers.AddLast(buffer);
length += buffer.Length;
}
}
public override int Read(byte[] buffer, int offset, int count)
{
if (buffer == null) throw new ArgumentNullException("buffer");
bool noMoreBuffersAvailable = HasNoMoreBuffersAvailable();
// Guard clause - closed and nothing more to write.
if (noMoreBuffersAvailable && sourceIsClosed)
return 0;
if (noMoreBuffersAvailable)
{
// Not closed yet! Block infinitely until we get closed or have some data.
while (HasNoMoreBuffersAvailable())
{
if (sourceIsClosed)
return 0;
Thread.Sleep(TimeSpan.FromMilliseconds(50));
}
}
byte[] currentBuffer = GetCurrentBuffer();
int numberOfBytesRead = DoRead(buffer, count, currentBuffer, offset);
PutLeftoverBytesAtFrontOfQueue(currentBuffer, numberOfBytesRead);
return numberOfBytesRead;
}
// Check if caller didn't have enough space to fit the buffer.
// Put the remaining bytes at the front of the queue.
private void PutLeftoverBytesAtFrontOfQueue(byte[] currentBuffer, int numberOfBytesRead)
{
if (currentBuffer == null) throw new ArgumentNullException("currentBuffer");
if (numberOfBytesRead == currentBuffer.Length)
return; // Clean read!
var remainingBuffer = new byte[currentBuffer.Length - numberOfBytesRead];
Array.Copy(currentBuffer, numberOfBytesRead, remainingBuffer, 0, remainingBuffer.Length);
lock (buffers)
buffers.AddFirst(remainingBuffer);
}
private int DoRead(byte[] buffer, int count, byte[] currentBuffer, int offset)
{
int maxNumberOfBytesWeCanWrite = Math.Min(count, currentBuffer.Length);
Array.Copy(currentBuffer, 0, buffer, offset, maxNumberOfBytesWeCanWrite);
position += maxNumberOfBytesWeCanWrite;
return maxNumberOfBytesWeCanWrite;
}
private byte[] GetCurrentBuffer()
{
byte[] currentBuffer;
lock (buffers)
{
currentBuffer = buffers.First.Value;
buffers.RemoveFirst();
}
return currentBuffer;
}
private bool HasNoMoreBuffersAvailable()
{
lock (buffers)
return buffers.Count == 0;
}
public override void Flush() { }
public override long Seek(long offset, SeekOrigin origin)
{
throw new InvalidOperationException("Cannot seek in CachingStream.");
}
public override void SetLength(long value)
{
throw new InvalidOperationException("Cannot set length in CachingStream.");
}
public override void Write(byte[] buffer, int offset, int count)
{
throw new InvalidOperationException("Cannot write in a CachingStream.");
}
public override bool CanRead
{
get { return true; }
}
public override bool CanSeek
{
get { return false; }
}
public override bool CanWrite
{
get { return false; }
}
public override long Length
{
get { return length; }
}
public override long Position
{
get { return position; }
set { throw new InvalidOperationException("Cannot set position in CachingStream."); }
}
}
I'm using a library that requires I provide an object that implements this interface:
public interface IConsole {
TextWriter StandardInput { get; }
TextReader StandardOutput { get; }
TextReader StandardError { get; }
}
The object's readers then get used by the library with:
IConsole console = new MyConsole();
int readBytes = console.StandardOutput.Read(buffer, 0, buffer.Length);
Normally the class implementing IConsole has the StandardOutput stream as coming from an external process. In that case the console.StandardOutput.Read calls work by blocking until there is some data written to the StandardOutput stream.
What I'm trying to do is create a test IConsole implementation that uses MemoryStreams and echo's whatever appears on the StandardInput back onto the StandardInput. I tried:
MemoryStream echoOutStream = new MemoryStream();
StandardOutput = new StreamReader(echoOutStream);
But the problem with that is the console.StandardOutput.Read will return 0 rather than block until there is some data. Is there anyway I can get a MemoryStream to block if there is no data available or is there a different in memory stream I could use?
Inspired by your answer, here's my multi-thread, multi-write version:
public class EchoStream : MemoryStream
{
private readonly ManualResetEvent _DataReady = new ManualResetEvent(false);
private readonly ConcurrentQueue<byte[]> _Buffers = new ConcurrentQueue<byte[]>();
public bool DataAvailable{get { return !_Buffers.IsEmpty; }}
public override void Write(byte[] buffer, int offset, int count)
{
_Buffers.Enqueue(buffer);
_DataReady.Set();
}
public override int Read(byte[] buffer, int offset, int count)
{
_DataReady.WaitOne();
byte[] lBuffer;
if (!_Buffers.TryDequeue(out lBuffer))
{
_DataReady.Reset();
return -1;
}
if (!DataAvailable)
_DataReady.Reset();
Array.Copy(lBuffer, buffer, lBuffer.Length);
return lBuffer.Length;
}
}
With your version you should Read the Stream upon Write, without any consecutively write be possible. My version buffers any written buffer in a ConcurrentQueue (it's fairly simple to change it to a simple Queue and lock it)
In the end I found an easy way to do it by inheriting from MemoryStream and taking over the Read and Write methods.
public class EchoStream : MemoryStream {
private ManualResetEvent m_dataReady = new ManualResetEvent(false);
private byte[] m_buffer;
private int m_offset;
private int m_count;
public override void Write(byte[] buffer, int offset, int count) {
m_buffer = buffer;
m_offset = offset;
m_count = count;
m_dataReady.Set();
}
public override int Read(byte[] buffer, int offset, int count) {
if (m_buffer == null) {
// Block until the stream has some more data.
m_dataReady.Reset();
m_dataReady.WaitOne();
}
Buffer.BlockCopy(m_buffer, m_offset, buffer, offset, (count < m_count) ? count : m_count);
m_buffer = null;
return (count < m_count) ? count : m_count;
}
}
I'm going to add one more refined version of EchoStream. This is a combination of the other two versions, plus some suggestions from the comments.
UPDATE - I have tested this EchoStream with over 50 terrabytes of data run through it for days on end. The test had it sitting between a network stream and the ZStandard compression stream. The async has also been tested, which brought a rare hanging condition to the surface. It appears the built in System.IO.Stream does not expect one to call both ReadAsync and WriteAsync on the same stream at the same time, which can cause it to hang if there isn't any data available because both calls utilize the same internal variables. Therefore I had to override those functions, which resolved the hanging issue.
This version has the following enhancements:
This was written from scratch using the System.IO.Stream base class instead of MemoryStream.
The constructor can set a max queue depth and if this level is reached then stream writes will block until a Read is performed which drops the queue depth back below the max level (no limit=0, default=10).
When reading/writing data, the buffer offset and count are now honored. Also, you can call Read with a smaller buffer than Write without throwing an exception or losing data. BlockCopy is used in a loop to fill in the bytes until count is satisfied.
There is a public property called AlwaysCopyBuffer, which makes a copy of the buffer in the Write function. Setting this to true will safely allow the byte buffer to be reused after calling Write.
There is a public property called ReadTimeout/WriteTimeout, which controls how long the Read/Write function will block before it returns 0 (default=Infinite, -1).
The BlockingCollection<> class is used, which under the hood combines the ConcurrentQueue and AutoResetEvent classes. Originally I was using these two classes, but there exists a rare condition where you will find that after data has been Enqueued( ), that it is not available immediately when AutoResetEvent allows a thread through in the Read( ). This happens about once every 500GB of data that passes through it. The cure was to Sleep and check for the data again. Sometimes a Sleep(0) worked, but in extreme cases where the CPU usage was high, it could be as high as Sleep(1000) before the data showed up. After I switched to BlockingCollection<>, it has a lot of extra code to handle off of this elegantly and without issues.
This has been tested to be thread safe for simultaneous async reads and writes.
using System;
using System.IO;
using System.Threading.Tasks;
using System.Threading;
using System.Collections.Concurrent;
public class EchoStream : Stream
{
public override bool CanTimeout { get; } = true;
public override int ReadTimeout { get; set; } = Timeout.Infinite;
public override int WriteTimeout { get; set; } = Timeout.Infinite;
public override bool CanRead { get; } = true;
public override bool CanSeek { get; } = false;
public override bool CanWrite { get; } = true;
public bool CopyBufferOnWrite { get; set; } = false;
private readonly object _lock = new object();
// Default underlying mechanism for BlockingCollection is ConcurrentQueue<T>, which is what we want
private readonly BlockingCollection<byte[]> _Buffers;
private int _maxQueueDepth = 10;
private byte[] m_buffer = null;
private int m_offset = 0;
private int m_count = 0;
private bool m_Closed = false;
private bool m_FinalZero = false; //after the stream is closed, set to true after returning a 0 for read()
public override void Close()
{
m_Closed = true;
// release any waiting writes
_Buffers.CompleteAdding();
}
public bool DataAvailable
{
get
{
return _Buffers.Count > 0;
}
}
private long _Length = 0L;
public override long Length
{
get
{
return _Length;
}
}
private long _Position = 0L;
public override long Position
{
get
{
return _Position;
}
set
{
throw new NotImplementedException();
}
}
public EchoStream() : this(10)
{
}
public EchoStream(int maxQueueDepth)
{
_maxQueueDepth = maxQueueDepth;
_Buffers = new BlockingCollection<byte[]>(_maxQueueDepth);
}
// we override the xxxxAsync functions because the default base class shares state between ReadAsync and WriteAsync, which causes a hang if both are called at once
public new Task WriteAsync(byte[] buffer, int offset, int count)
{
return Task.Run(() => Write(buffer, offset, count));
}
// we override the xxxxAsync functions because the default base class shares state between ReadAsync and WriteAsync, which causes a hang if both are called at once
public new Task<int> ReadAsync(byte[] buffer, int offset, int count)
{
return Task.Run(() =>
{
return Read(buffer, offset, count);
});
}
public override void Write(byte[] buffer, int offset, int count)
{
if (m_Closed || buffer.Length - offset < count || count <= 0)
return;
byte[] newBuffer;
if (!CopyBufferOnWrite && offset == 0 && count == buffer.Length)
newBuffer = buffer;
else
{
newBuffer = new byte[count];
System.Buffer.BlockCopy(buffer, offset, newBuffer, 0, count);
}
if (!_Buffers.TryAdd(newBuffer, WriteTimeout))
throw new TimeoutException("EchoStream Write() Timeout");
_Length += count;
}
public override int Read(byte[] buffer, int offset, int count)
{
if (count == 0)
return 0;
lock (_lock)
{
if (m_count == 0 && _Buffers.Count == 0)
{
if (m_Closed)
{
if (!m_FinalZero)
{
m_FinalZero = true;
return 0;
}
else
{
return -1;
}
}
if (_Buffers.TryTake(out m_buffer, ReadTimeout))
{
m_offset = 0;
m_count = m_buffer.Length;
}
else
{
if (m_Closed)
{
if (!m_FinalZero)
{
m_finalZero = true;
return 0;
}
else
{
return -1;
}
}
else
{
return 0;
}
}
}
int returnBytes = 0;
while (count > 0)
{
if (m_count == 0)
{
if (_Buffers.TryTake(out m_buffer, 0))
{
m_offset = 0;
m_count = m_buffer.Length;
}
else
break;
}
var bytesToCopy = (count < m_count) ? count : m_count;
System.Buffer.BlockCopy(m_buffer, m_offset, buffer, offset, bytesToCopy);
m_offset += bytesToCopy;
m_count -= bytesToCopy;
offset += bytesToCopy;
count -= bytesToCopy;
returnBytes += bytesToCopy;
}
_Position += returnBytes;
return returnBytes;
}
}
public override int ReadByte()
{
byte[] returnValue = new byte[1];
return (Read(returnValue, 0, 1) <= 0 ? -1 : (int)returnValue[0]);
}
public override void Flush()
{
}
public override long Seek(long offset, SeekOrigin origin)
{
throw new NotImplementedException();
}
public override void SetLength(long value)
{
throw new NotImplementedException();
}
}
UPDATE: this works in .NET 4.8, but the behavior was changed in .NET Core and it no longer blocks the same way.
An anonymous pipe stream blocks like a file stream and should handle more edge cases than the sample code provided.
Here is a unit test that demonstrates this behavior.
var cts = new CancellationTokenSource();
using (var pipeServer = new AnonymousPipeServerStream(PipeDirection.Out))
using (var pipeStream = new AnonymousPipeClientStream(PipeDirection.In, pipeServer.ClientSafePipeHandle))
{
var buffer = new byte[1024];
var readTask = pipeStream.ReadAsync(buffer, 0, buffer.Length, cts.Token);
Assert.IsFalse(readTask.IsCompleted, "Read already complete");
// Cancelling does NOT unblock the read
cts.Cancel();
Assert.IsFalse(readTask.IsCanceled, "Read cancelled");
// Only sending data does
pipeServer.WriteByte(42);
var bytesRead = await readTask;
Assert.AreEqual(1, bytesRead);
}
Here's my take on the EchoStream posted above. It handles the offset and count parameters on Write and Read.
public class EchoStream : MemoryStream
{
private readonly ManualResetEvent _DataReady = new ManualResetEvent(false);
private readonly ConcurrentQueue<byte[]> _Buffers = new ConcurrentQueue<byte[]>();
public bool DataAvailable { get { return !_Buffers.IsEmpty; } }
public override void Write(byte[] buffer, int offset, int count)
{
_Buffers.Enqueue(buffer.Skip(offset).Take(count).ToArray());
_DataReady.Set();
}
public override int Read(byte[] buffer, int offset, int count)
{
_DataReady.WaitOne();
byte[] lBuffer;
if (!_Buffers.TryDequeue(out lBuffer))
{
_DataReady.Reset();
return -1;
}
if (!DataAvailable)
_DataReady.Reset();
Array.Copy(lBuffer, 0, buffer, offset, Math.Min(lBuffer.Length, count));
return lBuffer.Length;
}
}
I was able to use this class to unit test a System.IO.Pipelines implementation. I needed a MemoryStream that could simulate multiple read calls in succession without reaching the end of the stream.
I was trying to use all the codes from other answers, as well as the famous EchoStream, but unfortunately, they all were not working as I need:
EchoStream stream doesn't work well with non-standard read and write sizes, causing loss of data and corrupted reads.
EchoStream limits the stream by the number of writes, but not by the count, so theoretically someone can write in tons of data.
Solution:
I've created a ThroughStream, which is limited by any specified exact buffer size. The actual size might grow up to bufferSize * 2, but not larger than that.
It works perfectly with any non-standard size reads and writes, doesn't fail in multithreading, and is quite simple and optimized.
And is available on Gist! (click here)
I have a list of files, and i need to read them each in a specific order into byte[] of a given size. This in itself is not a problem for a single file, a simple while ((got = fs.Read(piece, 0, pieceLength)) > 0) gets the job done perfectly fine. The last piece of the file may be smaller than desired, which is fine.
Now, there is a tricky bit: If I have multiple files, I need to have one continous stream, which means that if the last piece of a file is smaller that pieceLength, then I need to read (pieceLength-got) of the next file, and then keep on going on until the end of the last file.
So essentially, given X files, I will always read pieces that are exactly pieceLength long, except for the very last piece of the very last file, which may be smaller.
I just wonder if there is already something build in .net (3.5 SP1) that does the trick. My current approach is to create a Class that takes a list of files and then exposes a Read(byte[] buffer, long index, long length) function, similar to FileStream.Read(). This should be pretty straight forward because I do not have to change my calling code that reads the data, but before I reinvent the wheel I'd just like to double check that the wheel is not already built into the BCL.
Thanks :)
I don't believe there's anything in the framework, but I'd suggest making it a bit more flexible - take an IEnumerable<Stream> in your constructor, and derive from Stream yourself. Then to get file streams you can (assuming C# 3.0) just do:
Stream combined = new CombinationStream(files.Select(file => File.Open(file));
The "ownership" part is slightly tricky here - the above would allow the combination stream to take ownership of any stream it reads from, but you may not want it to have to iterate through all the rest of the streams and close them all if it's closed prematurely.
Here is what I came up based on #jon skeet's idea.
It just implements Read which was quite sufficient for me. (but no i need help on implementing the BeginRead/EndRead method.) Here is the full code containing both sync and async - Read and BeginRead/EndRead
https://github.com/facebook-csharp-sdk/combination-stream/blob/master/src/CombinationStream-Net20/CombinationStream.cs
internal class CombinationStream : System.IO.Stream
{
private readonly System.Collections.Generic.IList<System.IO.Stream> _streams;
private int _currentStreamIndex;
private System.IO.Stream _currentStream;
private long _length = -1;
private long _postion;
public CombinationStream(System.Collections.Generic.IList<System.IO.Stream> streams)
{
if (streams == null)
{
throw new System.ArgumentNullException("streams");
}
_streams = streams;
if (streams.Count > 0)
{
_currentStream = streams[_currentStreamIndex++];
}
}
public override void Flush()
{
if (_currentStream != null)
{
_currentStream.Flush();
}
}
public override long Seek(long offset, System.IO.SeekOrigin origin)
{
throw new System.InvalidOperationException("Stream is not seekable.");
}
public override void SetLength(long value)
{
this._length = value;
}
public override int Read(byte[] buffer, int offset, int count)
{
int result = 0;
int buffPostion = offset;
while (count > 0)
{
int bytesRead = _currentStream.Read(buffer, buffPostion, count);
result += bytesRead;
buffPostion += bytesRead;
_postion += bytesRead;
if (bytesRead <= count)
{
count -= bytesRead;
}
if (count > 0)
{
if (_currentStreamIndex >= _streams.Count)
{
break;
}
_currentStream = _streams[_currentStreamIndex++];
}
}
return result;
}
public override long Length
{
get
{
if (_length == -1)
{
_length = 0;
foreach (var stream in _streams)
{
_length += stream.Length;
}
}
return _length;
}
}
public override long Position
{
get { return this._postion; }
set { throw new System.NotImplementedException(); }
}
public override void Write(byte[] buffer, int offset, int count)
{
throw new System.InvalidOperationException("Stream is not writable");
}
public override bool CanRead
{
get { return true; }
}
public override bool CanSeek
{
get { return false; }
}
public override bool CanWrite
{
get { return false; }
}
}
Also available as a NuGet package
Install-Package CombinationStream