I want to check if file and its archive version is the same. I created something like this:
public static class FileUtils
{
public static bool SameAsArchive(this FileInfo file, string archivedFile)
{
using (var ms = new MemoryStream())
{
GZip.Decompress(File.OpenRead(archivedFile), ms, true);
return File.ReadAllBytes(file.FullName).SequenceEqual(ms.ToArray());
}
}
}
Is there any faster way of checking that instead of reading all bytes?
Edit
Thanks to @Stig I've created a new version:
public static bool SameAsArchive(this FileInfo file, string archive)
{
var bytesToRead = 4096;
var one = new byte[bytesToRead];
var two = new byte[bytesToRead];
using (var gs = new GZipStream(File.OpenRead(archive), CompressionMode.Decompress))
using (var fs = File.OpenRead(file.FullName))
{
int file1byte;
int file2byte;
do
{
file1byte = fs.Read(one);
file2byte = gs.Read(two);
}
while (one.SequenceEqual(two) && (file1byte != 0));
return file1byte == file2byte && file1byte == 0;
}
}
But it seems not work properly. For some reason, sometimes I do not read full 4096 bytes from GZipStream
:
// This is log how many bytes are read in each `do while` loop iteration
read bytes from fs: 4096, read bytes from gs: 4096
read bytes from fs: 4096, read bytes from gs: 4096
read bytes from fs: 4096, read bytes from gs: 4096
read bytes from fs: 4096, read bytes from gs: 4096
read bytes from fs: 4096, read bytes from gs: 770
read bytes from fs: 4096, read bytes from gs: 4096
read bytes from fs: 4096, read bytes from gs: 4096
read bytes from fs: 4096, read bytes from gs: 4096
read bytes from fs: 4096, read bytes from gs: 4096
read bytes from fs: 4096, read bytes from gs: 4096
read bytes from fs: 4096, read bytes from gs: 665
read bytes from fs: 4096, read bytes from gs: 4096
read bytes from fs: 4096, read bytes from gs: 4096
read bytes from fs: 4096, read bytes from gs: 4096
read bytes from fs: 4096, read bytes from gs: 4096
read bytes from fs: 4096, read bytes from gs: 4096
read bytes from fs: 4096, read bytes from gs: 853
read bytes from fs: 4096, read bytes from gs: 4096
I noticed, that the problem exists only using .NET6.
With .net core 3.1
this example works properly:
static string GenerateContent()
{
var rnd = new Random();
var sb = new StringBuilder();
for (int i = 0; i < 10000000; i++)
{
sb.Append(rnd.Next(0, 100));
}
return sb.ToString();
}
static void Compress(string input, string output)
{
using (var originalFileStream = File.OpenRead(input))
using (var compressedFileStream = File.OpenWrite(output))
using (var compressor = new GZipStream(compressedFileStream, CompressionMode.Compress))
originalFileStream.CopyTo(compressor);
}
static bool AreFilesEqual(string input, string gzip)
{
var bytesToRead = 4096;
var one = new byte[bytesToRead];
var two = new byte[bytesToRead];
using (var gs = new GZipStream(File.OpenRead(gzip), CompressionMode.Decompress))
using (var fs = File.OpenRead(input))
{
int file1byte;
int file2byte;
do
{
file1byte = fs.Read(one);
file2byte = gs.Read(two);
}
while (one.SequenceEqual(two) && (file1byte != 0));
return file1byte == file2byte && file1byte == 0;
}
}
static void Main(string[] args)
{
var input = @"c:\logs\input3.txt";
var output = @"c:\logs\example3.gz";
// create input
File.WriteAllText(input, GenerateContent());
// compress input
Compress(input, output);
// compare files
var areFilesEqual = AreFilesEqual(input, output);
// .NET 6.0 -> files aren't equal
// .NET core 3.1 -> files are equal
}
Seems like Read
does not always return requested amount of bytes. I created simple extension that forces missing bytes to be read:
public static class Extensions
{
public static int ForceRead(this Stream fs, Span<byte> buffer)
{
var totalReadBytes = 0;
do
{
var readBytes = fs.Read(buffer, totalReadBytes, buffer.Length - totalReadBytes);
if (readBytes == 0)
return totalReadBytes;
totalReadBytes += readBytes;
}
while (totalReadBytes < buffer.Length);
return totalReadBytes;
}
}