I have a zip file which may contain files of the type I want, but I don't know yet. When I open these files (when unzipped) in notepad, the first 8 characters are always the same. Is there a way to find out whether the first 8 characters match without unzipping the whole (potentially several gb in size) file?
The files are often over 4gb, and have might have been compressed using deflate64. This means I can't use SharpZipLib or DotNetZip - I have tried both and had them fail on the same file.
I've been trying to use sevenzipsharp in the following way, but it only gave me zeros at the start of every file. Also e.Cancel did not cancel, so it ended up unzipping the whole thing into mstream anyway, which I would like to avoid.
SevenZipExtractor extractor = new SevenZipExtractor(zipfilename);
foreach (ArchiveFileInfo info in extractor.ArchiveFileData)
{
bool isMyFileType = false;
MemoryStream mstream = new MemoryStream();
extractor.Extracting += (object sender, ProgressEventArgs e) =>
{
if (e.PercentDone * info.Size / 100 > 32)
{
// read the first 32 bytes
byte[] buffer = new byte[32];
if (mstream.Length >= 32)
mstream.Read(buffer, 0, 32);
else
mstream.Read(buffer, 0, (int)mstream.Length);
//bung the buffer into a streamreader
MemoryStream memstream = new MemoryStream(buffer);
StreamReader file = new StreamReader(memstream);
//read the stream
string filestart = "";
for (int i = 0; i < 8; i++)
{
if (!file.EndOfStream)
{
filestart = filestart + ((char)file.Read()).ToString();
}
}
isMyFileType = (filestart == "My8chars");
e.Cancel = true;
}
};
await CheckForMyFileType(info, mstream, extractor);
if (isMyFileType)
{
//do stuff if it's the right file type
private Task CheckForMyFileType(ArchiveFileInfo info, MemoryStream mstream, SevenZipExtractor extractor)
{
TaskCompletionSource<bool> tcs = new TaskCompletionSource<bool>();
extractor.ExtractFile(info.FileName, mstream);
tcs.SetResult(true);
return tcs.Task;
}