I know this is a PowerShell question, but you can make good use of parallelization in C#. You also mentioned in one of the comments about using C# as an alternative, so I thought it wouldn't hurt posting a possible implemenation of how it could be done.
You could first create a method to calculate the MD5 Checksum for a file:
private static string CalculateMD5(string filename)
{
using var md5 = MD5.Create();
using var stream = File.OpenRead(filename);
var hash = md5.ComputeHash(stream);
return BitConverter.ToString(hash).Replace("-", string.Empty).ToLowerInvariant();
}
Then you could make a method with queries all file hashes in parellel using ParallelEnumerable.AsParallel()
:
private static IEnumerable<FileHash> FindFileHashes(string directoryPath)
{
var allFiles = Directory
.EnumerateFiles(directoryPath, "*", SearchOption.AllDirectories);
var hashedFiles = allFiles
.AsParallel()
.Select(filename => new FileHash {
FileName = filename,
Hash = CalculateMD5(filename)
});
return hashedFiles;
}
Then you can simply use the above method to delete duplicate files:
private static void DeleteDuplicateFiles(string directoryPath)
{
var fileHashes = new HashSet<string>();
foreach (var fileHash in FindFileHashes(directoryPath))
{
if (!fileHashes.Contains(fileHash.Hash))
{
Console.WriteLine($"Found - File : {fileHash.FileName} Hash : {fileHash.Hash}");
fileHashes.Add(fileHash.Hash);
continue;
}
Console.WriteLine($"Deleting - File : {fileHash.FileName} Hash : {fileHash.Hash}");
File.Delete(fileHash.FileName);
}
}
Full Program:
using System;
using System.Collections.Generic;
using System.Linq;
using System.IO;
using System.Security.Cryptography;
namespace Test
{
internal class FileHash
{
public string FileName { get; set; }
public string Hash { get; set; }
}
public class Program
{
public static void Main()
{
var path = "C:\\Path\To\Files";
if (File.Exists(path))
{
Console.WriteLine($"Deleting duplicate files at {path}");
DeleteDuplicateFiles(path);
}
}
private static void DeleteDuplicateFiles(string directoryPath)
{
var fileHashes = new HashSet<string>();
foreach (var fileHash in FindFileHashes(directoryPath))
{
if (!fileHashes.Contains(fileHash.Hash))
{
Console.WriteLine($"Found - File : {fileHash.FileName} Hash : {fileHash.Hash}");
fileHashes.Add(fileHash.Hash);
continue;
}
Console.WriteLine($"Deleting - File : {fileHash.FileName} Hash : {fileHash.Hash}");
File.Delete(fileHash.FileName);
}
}
private static IEnumerable<FileHash> FindFileHashes(string directoryPath)
{
var allFiles = Directory
.EnumerateFiles(directoryPath, "*", SearchOption.AllDirectories);
var hashedFiles = allFiles
.AsParallel()
.Select(filename => new FileHash {
FileName = filename,
Hash = CalculateMD5(filename)
});
return hashedFiles;
}
private static string CalculateMD5(string filename)
{
using var md5 = MD5.Create();
using var stream = File.OpenRead(filename);
var hash = md5.ComputeHash(stream);
return BitConverter.ToString(hash).Replace("-", string.Empty).ToLowerInvariant();
}
}
}