There are many way to achieve this, for many real world CSV applications it is helpful to read the CSV input into a typed in-memory store there are standard libraries that can assist with this like CsvHelper as explained in this canonical post: Parsing CSV files in C#, with header
However for this simple requirement we only need to parse the values for Country
form the master list, in this case the second csv. We don't need to manage, validate or parse any of the other fields in the CSVs
- Build a list of unique Country values from the second csv
- Iterate the first csv
- Get the
Country
value
- Check against the list of countries from the second csv
- Write to the third csv if the country was not found
You can test the following code on .NET Fiddle
NOTE: this code uses StringWriter
and StringReader
as their interfaces are the same as the file reader and writers in the System.IO
namespace. but we can remove the complexity associated with file access for this simple requirement
string inputcsv = @"Id,Field1,Field2,Country,Field3
1,one,two,Australia,three
2,one,two,New Zealand,three
3,one,two,Indonesia,three
4,one,two,China,three
5,one,two,Japan,three";
string masterCsv = @"Field1,Country,Field2
one,Indonesia,...
one,China,...
one,Japan,...";
string errorCsv = "";
// For all in inputCsv where the country value is not listed in the masterCsv
// Write to errorCsv
// Step 1: Build a list of unique Country values
bool csvHasHeader = true;
int countryIndexInMaster = 1;
char delimiter = ',';
List<string> countries = new List<string>();
using (var masterReader = new System.IO.StringReader(masterCsv))
{
string line = null;
if (csvHasHeader)
{
line = masterReader.ReadLine();
// an example of how to find the column index from first principals
if(line != null)
countryIndexInMaster = line.Split(delimiter).ToList().FindIndex(x => x.Trim('"').Equals("Country", StringComparison.OrdinalIgnoreCase));
}
while ((line = masterReader.ReadLine()) != null)
{
string country = line.Split(delimiter)[countryIndexInMaster].Trim('"');
if (!countries.Contains(country))
countries.Add(country);
}
}
// Read the input CSV, if the country is not in the master list "countries", write it to the errorCsv
int countryIndexInInput = 3;
csvHasHeader = true;
var outputStringBuilder = new System.Text.StringBuilder();
using (var outputWriter = new System.IO.StringWriter(outputStringBuilder))
using (var inputReader = new System.IO.StringReader(inputcsv))
{
string line = null;
if (csvHasHeader)
{
line = inputReader.ReadLine();
if (line != null)
{
countryIndexInInput = line.Split(delimiter).ToList().FindIndex(x => x.Trim('"').Equals("Country", StringComparison.OrdinalIgnoreCase));
outputWriter.WriteLine(line);
}
}
while ((line = inputReader.ReadLine()) != null)
{
string country = line.Split(delimiter)[countryIndexInInput].Trim('"');
if(!countries.Contains(country))
{
outputWriter.WriteLine(line);
}
}
outputWriter.Flush();
errorCsv = outputWriter.ToString();
}
// dump output to the console
Console.WriteLine(errorCsv);