我正在寻找MemoryStream的实现,它不会将内存分配为一个大块,而是一个块的集合。我想在内存(64位)中存储几GB的数据,并避免内存碎片的限制。
发布于 2009-07-29 21:53:22
您需要首先确定虚拟地址碎片是否是问题所在。
如果你使用的是一台64位的机器(你似乎表明你是),我非常怀疑它是不是。每个64位进程几乎拥有整个64位虚拟内存空间,您唯一担心的是虚拟地址空间碎片,而不是物理内存碎片(这是操作系统必须担心的问题)。操作系统内存管理器已经在幕后对内存进行了分页。在可预见的将来,您不会在耗尽物理内存之前耗尽虚拟地址空间。在我们都退休之前,这不太可能发生变化。
如果你有一个32位地址空间,那么在GB内存中分配连续的大块内存,你很快就会遇到碎片问题。CLR中没有分配内存流的股票区块。在ASP.NET中有一个在幕后(出于其他原因),但它是不可访问的。如果您必须走这条路,那么您最好自己编写一个应用程序,因为您的应用程序的使用模式不太可能与许多其他应用程序相似,并且尝试将您的数据放入32位地址空间可能是您的性能瓶颈。
我强烈建议,如果您正在处理GB级的数据,则需要64位进程。对于32位地址空间碎片,它将比手工解决方案做得更好,不管你有多聪明。
发布于 2009-07-29 22:31:22
如下所示:
class ChunkedMemoryStream : Stream
{
private readonly List<byte[]> _chunks = new List<byte[]>();
private int _positionChunk;
private int _positionOffset;
private long _position;
public override bool CanRead
{
get { return true; }
}
public override bool CanSeek
{
get { return true; }
}
public override bool CanWrite
{
get { return true; }
}
public override void Flush() { }
public override long Length
{
get { return _chunks.Sum(c => c.Length); }
}
public override long Position
{
get
{
return _position;
}
set
{
_position = value;
_positionChunk = 0;
while (_positionOffset != 0)
{
if (_positionChunk >= _chunks.Count)
throw new OverflowException();
if (_positionOffset < _chunks[_positionChunk].Length)
return;
_positionOffset -= _chunks[_positionChunk].Length;
_positionChunk++;
}
}
}
public override int Read(byte[] buffer, int offset, int count)
{
int result = 0;
while ((count != 0) && (_positionChunk != _chunks.Count))
{
int fromChunk = Math.Min(count, _chunks[_positionChunk].Length - _positionOffset);
if (fromChunk != 0)
{
Array.Copy(_chunks[_positionChunk], _positionOffset, buffer, offset, fromChunk);
offset += fromChunk;
count -= fromChunk;
result += fromChunk;
_position += fromChunk;
}
_positionOffset = 0;
_positionChunk++;
}
return result;
}
public override long Seek(long offset, SeekOrigin origin)
{
long newPos = 0;
switch (origin)
{
case SeekOrigin.Begin:
newPos = offset;
break;
case SeekOrigin.Current:
newPos = Position + offset;
break;
case SeekOrigin.End:
newPos = Length - offset;
break;
}
Position = Math.Max(0, Math.Min(newPos, Length));
return newPos;
}
public override void SetLength(long value)
{
throw new NotImplementedException();
}
public override void Write(byte[] buffer, int offset, int count)
{
while ((count != 0) && (_positionChunk != _chunks.Count))
{
int toChunk = Math.Min(count, _chunks[_positionChunk].Length - _positionOffset);
if (toChunk != 0)
{
Array.Copy(buffer, offset, _chunks[_positionChunk], _positionOffset, toChunk);
offset += toChunk;
count -= toChunk;
_position += toChunk;
}
_positionOffset = 0;
_positionChunk++;
}
if (count != 0)
{
byte[] chunk = new byte[count];
Array.Copy(buffer, offset, chunk, 0, count);
_chunks.Add(chunk);
_positionChunk = _chunks.Count;
_position += count;
}
}
}
class Program
{
static void Main(string[] args)
{
ChunkedMemoryStream cms = new ChunkedMemoryStream();
Debug.Assert(cms.Length == 0);
Debug.Assert(cms.Position == 0);
cms.Position = 0;
byte[] helloworld = Encoding.UTF8.GetBytes("hello world");
cms.Write(helloworld, 0, 3);
cms.Write(helloworld, 3, 3);
cms.Write(helloworld, 6, 5);
Debug.Assert(cms.Length == 11);
Debug.Assert(cms.Position == 11);
cms.Position = 0;
byte[] b = new byte[20];
cms.Read(b, 3, (int)cms.Length);
Debug.Assert(b.Skip(3).Take(11).SequenceEqual(helloworld));
cms.Position = 0;
cms.Write(Encoding.UTF8.GetBytes("seeya"), 0, 5);
Debug.Assert(cms.Length == 11);
Debug.Assert(cms.Position == 5);
cms.Position = 0;
cms.Read(b, 0, (byte) cms.Length);
Debug.Assert(b.Take(11).SequenceEqual(Encoding.UTF8.GetBytes("seeya world")));
Debug.Assert(cms.Length == 11);
Debug.Assert(cms.Position == 11);
cms.Write(Encoding.UTF8.GetBytes(" again"), 0, 6);
Debug.Assert(cms.Length == 17);
Debug.Assert(cms.Position == 17);
cms.Position = 0;
cms.Read(b, 0, (byte)cms.Length);
Debug.Assert(b.Take(17).SequenceEqual(Encoding.UTF8.GetBytes("seeya world again")));
}
}发布于 2015-08-07 00:16:05
必应团队已经发布了RecyclableMemoryStream,并在here上写了关于它的文章。他们列举的好处是:
performance
<>G215>
https://stackoverflow.com/questions/1203121
复制相似问题