This is my 2 simple static methods to convert text from csv file to List<List<string>>
and vice versa. Each method use row convertor.
This code should take into account all the possibilities of the csv file. You can define own csv separator and this methods try to correct escape double 'quote' char, and deals with the situation when all text in quotes are one cell and csv separator is inside quoted string including multiple lines in one cell and can ignore empty rows.
Last method is only for testing. So you can ignore it, or test your own, or others solution with this test method :). For testing I used this hard csv with 2 rows on 4 lines:
0,a,""bc,d
"e, f",g,"this,is, o
ne ""lo
ng, cell""",h
This is final code. For simplicity, I removed all try catch blocks.
using System;
using System.Collections.Generic;
using System.Linq;
public static class Csv {
public static string FromListToString(List<List<string>> csv, string separator = ",", char quotation = '"', bool returnFirstRow = true)
{
string content = "";
for (int row = 0; row < csv.Count; row++) {
content += (row > 0 ? Environment.NewLine : "") + RowFromListToString(csv[row], separator, quotation);
}
return content;
}
public static List<List<string>> FromStringToList(string content, string separator = ",", char quotation = '"', bool returnFirstRow = true, bool ignoreEmptyRows = true)
{
List<List<string>> csv = new List<List<string>>();
string[] rows = content.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
if (rows.Length <= (returnFirstRow ? 0 : 1)) { return csv; }
List<string> csvRow = null;
for (int rowIndex = 0; rowIndex < rows.Length; rowIndex++) {
(List<string> row, bool rowClosed) = RowFromStringToList(rows[rowIndex], csvRow, separator, quotation);
if (rowClosed) { if (!ignoreEmptyRows || row.Any(rowItem => rowItem.Length > 0)) { csv.Add(row); csvRow = null; } } // row ok, add to list
else { csvRow = row; } // not fully created, continue
}
if (!returnFirstRow) { csv.RemoveAt(0); } // remove header
return csv;
}
public static string RowFromListToString(List<string> csvData, string separator = ",", char quotation = '"')
{
csvData = csvData.Select(element =>
{
if (element.Contains(quotation)) {
element = element.Replace(quotation.ToString(), quotation.ToString() + quotation.ToString());
}
if (element.Contains(separator) || element.Contains(Environment.NewLine)) {
element = "\"" + element + "\"";
}
return element;
}).ToList();
return string.Join(separator, csvData);
}
public static (List<string>, bool) RowFromStringToList(string csvRow, List<string> continueWithRow = null, string separator = ",", char quotation = '"')
{
bool rowClosed = true;
if (continueWithRow != null && continueWithRow.Count > 0) {
// in previous result quotation are fixed so i need convert back to double quotation
string previousCell = quotation.ToString() + continueWithRow.Last().Replace(quotation.ToString(), quotation.ToString() + quotation.ToString()) + Environment.NewLine;
continueWithRow.RemoveAt(continueWithRow.Count - 1);
csvRow = previousCell + csvRow;
}
char tempQuote = (char)162;
while (csvRow.Contains(tempQuote)) { tempQuote = (char)(tempQuote + 1); }
char tempSeparator = (char)(tempQuote + 1);
while (csvRow.Contains(tempSeparator)) { tempSeparator = (char)(tempSeparator + 1); }
csvRow = csvRow.Replace(quotation.ToString() + quotation.ToString(), tempQuote.ToString());
if(csvRow.Split(new char[] { quotation }, StringSplitOptions.None).Length % 2 == 0) { rowClosed = !rowClosed; }
string[] csvSplit = csvRow.Split(new string[] { separator }, StringSplitOptions.None);
List<string> csvList = csvSplit
.ToList()
.Aggregate("",
(string row, string item) => {
if (row.Count((ch) => ch == quotation) % 2 == 0) { return row + (row.Length > 0 ? tempSeparator.ToString() : "") + item; }
else { return row + separator + item; }
},
(string row) => row.Split(tempSeparator).Select((string item) => item.Trim(quotation).Replace(tempQuote, quotation))
).ToList();
if (continueWithRow != null && continueWithRow.Count > 0) {
return (continueWithRow.Concat(csvList).ToList(), rowClosed);
}
return (csvList, rowClosed);
}
public static bool Test()
{
string csvText = "0,a,\"\"bc,d" + Environment.NewLine + "\"e, f\",g,\"this,is, o" + Environment.NewLine + "ne \"\"lo" + Environment.NewLine + "ng, cell\"\"\",h";
List<List<string>> csvList = new List<List<string>>() { new List<string>() { "0", "a", "\"bc", "d" }, new List<string>() { "e, f", "g", "this,is, o" + Environment.NewLine + "ne \"lo" + Environment.NewLine + "ng, cell\"", "h" } };
List<List<string>> csvTextAsList = Csv.FromStringToList(csvText);
bool ok = Enumerable.SequenceEqual(csvList[0], csvTextAsList[0]) && Enumerable.SequenceEqual(csvList[1], csvTextAsList[1]);
string csvListAsText = Csv.FromListToString(csvList);
return ok && csvListAsText == csvText;
}
}
Usage examples:
// get List<List<string>> representation of csv
var csvFromText = Csv.FromStringToList(csvAsText);
// read csv file with custom separator and quote
// return no header and ignore empty rows
var csvFile = File.ReadAllText(csvFileFullPath);
var csvFromFile = Csv.FromStringToList(csvFile, ";", '"', false, false);
// get text representation of csvData from List<List<string>>
var csvAsText = Csv.FromListToString(csvData);
Notes:
This: char tempQuote = (char)162;
is first rare character from ASCI table. The script searches for this, or the first next few ascii character that is NOT in the text and uses it as a temporary escape and quote characters.