Been using the following console application:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Configuration;
namespace ConsoleApp1
{
class Program
{
static StringBuilder sBuilder = new StringBuilder();
static StreamWriter file;
static void Main(string[] args)
{
try
{
using (file = new StreamWriter(ConfigurationManager.AppSettings["outFile"], true))
{
ProcessDirectory(ConfigurationManager.AppSettings["inDir"]);
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
File.WriteAllText(ConfigurationManager.AppSettings["logFile"], ex.Message);
throw;
}
}
public static void ProcessDirectory(string targetDirectory)
{
string[] fileEntries = Directory.GetFiles(targetDirectory);
foreach (string fileName in fileEntries)
ProcessFile(fileName);
string[] subdirectoryEntries = Directory.GetDirectories(targetDirectory);
foreach (string subdirectory in subdirectoryEntries)
ProcessDirectory(subdirectory);
}
public static void ProcessFile(string path)
{
var lines = File.ReadAllLines(path);
var filtered = lines
.Where(x => x[0] != '#')
.Select(line => line.Split(' '))
.Where(fields =>
fields[8] != '-' // and other filtering
)
.Select(f => string.Join(" ", new string[] {
f[0],
f[8].ToLower().Replace("some_value",""),
((some_contextual_condition || another_contextual_condition)? "1" : "0")
}
))
.Distinct();
var sBuilder = new StringBuilder();
filtered
.ToList()
.ForEach(f =>
{
sBuilder.AppendLine(f);
});
file.Write(sBuilder.ToString());
}
}
}
There are about 3500 files as input, totaling 340 GBs. After processing about 400 files and about 200 write operations, nothing gets written anymore to the output file.
I've been trying writing line by line, by using StringBuilder as static class property or as a locally scoped variable in the ProcessFile method.
Attached image with running console application. You may notice that the output file size stopped increasing ~ by the time when file 380 was being processed. Try... catch embedding all Main method content catches nothing.