While I was dealing with relatively large text files I noticed something strange. Async Reading and writing is actually slower than the non-async reading:
E,g, executing this dummy code:
var res1 = File.WriteAllLinesAsync(string.Format(@"C:\Projects\DelMee\file{0}.txt", i), lines);
var res2 = File.WriteAllLinesAsync(string.Format(@"C:\Projects\DelMee\file{0}_bck.txt", i), lines);
await res1;
await res2;
is actually much slower than
File.WriteAllLines(string.Format(@"C:\Projects\DelMee\file{0}.txt", i), lines);
File.WriteAllLines(string.Format(@"C:\Projects\DelMee\file{0}_bck.txt", i), lines);
In theory the first approach should be faster, because the second writing should be stared before the first one is completed. The performance difference is about 100% for 15 ~25MBs files (10 vs 20 seconds).
I noticed the same behavior for ReadAllLines and ReadAllLinesAsync.
Update:0 The main idea is to have all files processed after the completion of the function of TestFileWriteXXX functions. Therefore
Task.WhenAll(allTasks1); // Without await is not a valid option
Update:1 I added read write using threads and it showed 50% improvement. Here is the complete example:
Update:2 I updated the code in order to eliminate the buffer generation overhead
const int MaxAttempts = 5;
static void Main(string[] args)
{
TestFileWrite();
TestFileWriteViaThread();
TestFileWriteAsync();
Console.ReadLine();
}
private static void TestFileWrite()
{
Clear();
Stopwatch stopWatch = new Stopwatch();
stopWatch.Start();
Console.WriteLine( "Begin TestFileWrite");
for (int i = 0; i < MaxAttempts; ++i)
{
TestFileWriteInt(i);
}
TimeSpan ts = stopWatch.Elapsed;
string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10);
Console.WriteLine("TestFileWrite took: " + elapsedTime);
}
private static void TestFileWriteViaThread()
{
Clear();
Stopwatch stopWatch = new Stopwatch();
stopWatch.Start();
Console.WriteLine("Begin TestFileWriteViaThread");
List<Thread> _threads = new List<Thread>();
for (int i = 0; i < MaxAttempts; ++i)
{
var t = new Thread(TestFileWriteInt);
t.Start(i);
_threads.Add(t);
}
_threads.ForEach(T => T.Join());
TimeSpan ts = stopWatch.Elapsed;
string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10);
Console.WriteLine("TestFileWriteViaThread took: " + elapsedTime);
}
private static void TestFileWriteInt(object oIndex)
{
int index = (int)oIndex;
List<string> lines = GenerateLines(index);
File.WriteAllLines(string.Format(@"C:\Projects\DelMee\file{0}.txt", index), lines);
File.WriteAllLines(string.Format(@"F:\Projects\DelMee\file{0}_bck.txt", index), lines);
var text = File.ReadAllLines(string.Format(@"C:\Projects\DelMee\file{0}.txt", index));
var text1 = File.ReadAllLines(string.Format(@"C:\Projects\DelMee\file{0}.txt", index));
//File.WriteAllLines(string.Format(@"C:\Projects\DelMee\file_test{0}.txt", index), text1);
}
private static async void TestFileWriteAsync()
{
Clear();
Console.WriteLine("Begin TestFileWriteAsync ");
Stopwatch stopWatch = new Stopwatch();
stopWatch.Start();
for (int i = 0; i < MaxAttempts; ++i)
{
List<string> lines = GenerateLines(i);
var allTasks = new List<Task>();
allTasks.Add(File.WriteAllLinesAsync(string.Format(@"C:\Projects\DelMee\file{0}.txt", i), lines));
allTasks.Add(File.WriteAllLinesAsync(string.Format(@"F:\Projects\DelMee\file{0}_bck.txt", i), lines));
await Task.WhenAll(allTasks);
var allTasks1 = new List<Task<string[]>>();
allTasks1.Add(File.ReadAllLinesAsync(string.Format(@"C:\Projects\DelMee\file{0}.txt", i)));
allTasks1.Add(File.ReadAllLinesAsync(string.Format(@"C:\Projects\DelMee\file{0}.txt", i)));
await Task.WhenAll(allTasks1);
// await File.WriteAllLinesAsync(string.Format(@"C:\Projects\DelMee\file_test{0}.txt", i), allTasks1[0].Result);
}
stopWatch.Stop();
TimeSpan ts = stopWatch.Elapsed;
string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10);
Console.WriteLine("TestFileWriteAsync took: " + elapsedTime);
}
private static void Clear()
{
for (int i = 0; i < 15; ++i)
{
System.IO.File.Delete(string.Format(@"C:\Projects\DelMee\file{0}.txt", i));
System.IO.File.Delete(string.Format(@"F:\Projects\DelMee\file{0}_bck.txt", i));
}
}
static string buffer = new string('a', 25 * 1024 * 1024);
private static List<string> GenerateLines(int i)
{
return new List<string>() { buffer };
}
And the results are:
TestFileWrite took: 00:00:03.50
TestFileWriteViaThread took: 00:00:01.63
TestFileWriteAsync took: 00:00:06.78
8 Code CPU/ C and F are 2 different SSD drive 850 EVOs on 2 different SATAs.
Update:3 - Conclusion Looks like the File.WriteAllLinesAsync does handle well the scenario when we want to flush large amount of data. As it was pointed out from the answers bellow it is better to use FileStream directly. But still async is slower than sequential access.
But for now the fastest approach still remains if you use multi threading.