Using some of the ideas from @dbc, I have a couple of suggestions.
I noticed about a 30% increase in speed if I used Configuration.ReadingExceptionOccurred
to catch the exception.
async void Main()
{
List<Foo> records = new List<Foo>();
for (int i = 0; i < 300000; i++)
{
var amount = (i + 1).ToString();
if (i % 5 == 0)
{
amount = $"${i + 1}";
}
records.Add(new Foo {Id = i + 1, Amount = amount});
}
using (var writer = new StreamWriter(@"C:\Temp\TypeConverterException.csv"))
using (var csv = new CsvWriter(writer, CultureInfo.InvariantCulture))
{
csv.WriteRecords(records);
}
var errors = new List<string>();
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
ReadingExceptionOccurred = args =>
{
if (args.Exception.GetType() == typeof(TypeConverterException))
{
var conversionEx = (TypeConverterException)args.Exception;
errors.Add(
$"Error on line {conversionEx.Context.Parser.RawRow} converting value '{conversionEx.Text}' to type '{conversionEx.MemberMapData.Type.Name}' for field '{conversionEx.MemberMapData.Names[0]}'"
);
return false;
}
else
{
return true;
}
}
};
using (var reader = new StreamReader(@"C:\Temp\TypeConverterException.csv"))
using (var csv = new CsvReader(reader, config))
{
var start = DateTime.Now;
csv.Read();
csv.ReadHeader();
var barRecords = new List<Bar>();
while (await csv.ReadAsync())
{
var record = csv.GetRecord<Bar>();
if (record != null)
barRecords.Add(record);
}
var end = DateTime.Now;
var difference = end - start;
difference.TotalSeconds.Dump();
}
}
public class Foo
{
public int Id { get; set; }
public string Amount { get; set; }
}
public class Bar
{
public int Id { get; set; }
public decimal Amount { get; set; }
}
However, it was considerably faster if I could use another class to import the records that had string
for the properties that could have conversion errors and then manually did a TryParse()
before converting to the final class that I wanted.
async void Main()
{
List<Foo> records = new List<Foo>();
for (int i = 0; i < 300000; i++)
{
var amount = (i + 1).ToString();
if (i % 5 == 0)
{
amount = $"${i + 1}";
}
records.Add(new Foo {Id = i + 1, Amount = amount});
}
using (var writer = new StreamWriter(@"C:\Temp\TypeConverterException.csv"))
using (var csv = new CsvWriter(writer, CultureInfo.InvariantCulture))
{
csv.WriteRecords(records);
}
var errors = new List<string>();
using (var reader = new StreamReader(@"C:\Temp\TypeConverterException.csv"))
using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture))
{
var start = DateTime.Now;
var fooRecords = csv.GetRecordsAsync<Foo>();
var barRecords = new List<Bar>();
await foreach (var record in fooRecords)
{
var canParce = decimal.TryParse(record.Amount, out decimal barAmount);
if (canParce)
{
barRecords.Add(new Bar { Id = record.Id, Amount = barAmount});
}
else
{
errors.Add($"Error converting value '{record.Amount}' to decimal for field Amount");
}
}
var end = DateTime.Now;
var difference = end - start;
difference.TotalSeconds.Dump();
}
}
public class Foo
{
public int Id { get; set; }
public string Amount { get; set; }
}
public class Bar
{
public int Id { get; set; }
public decimal Amount { get; set; }
}