Task
I have a huge file (≈ 20 GB) containing integers and want to read them in C#.
Simple approach
Reading the file to memory (into a byte-array) is quite fast (using an SSD, the whole file fits into memory). But when I read these bytes with a binary-reader (via memory-stream) and the ReadInt32-method takes significantly longer than reading the file to memory. I expected to be disk-IO the bottleneck, but it's the conversion!
Idea and question
Is there a way to directly cast the whole byte-array into an int-array not having to convert it one-by-one with the ReadInt32-method?
class Program
{
static int size = 256 * 1024 * 1024;
static string filename = @"E:\testfile";
static void Main(string[] args)
{
Write(filename, size);
int[] result = Read(filename, size);
Console.WriteLine(result.Length);
}
static void Write(string filename, int size)
{
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
BinaryWriter bw = new BinaryWriter(new FileStream(filename, FileMode.Create), Encoding.UTF8);
for (int i = 0; i < size; i++)
{
bw.Write(i);
}
bw.Close();
stopwatch.Stop();
Console.WriteLine(String.Format("File written in {0}ms", stopwatch.ElapsedMilliseconds));
}
static int[] Read(string filename, int size)
{
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
byte[] buffer = File.ReadAllBytes(filename);
BinaryReader br = new BinaryReader(new MemoryStream(buffer), Encoding.UTF8);
stopwatch.Stop();
Console.WriteLine(String.Format("File read into memory in {0}ms", stopwatch.ElapsedMilliseconds));
stopwatch.Reset();
stopwatch.Start();
int[] result = new int[size];
for (int i = 0; i < size; i++)
{
result[i] = br.ReadInt32();
}
br.Close();
stopwatch.Stop();
Console.WriteLine(String.Format("Byte-array casted to int-array in {0}ms", stopwatch.ElapsedMilliseconds));
return result;
}
}
- File written in 5499ms
- File read into memory in 455ms
- Byte-array casted to int-array in 3382ms