I've got a project that has to do a bulk import to SQL from a CSV file. Creating the data columns has been a success, however, I'm running into a problem with the rows. A comma is used as the delimiter to separate the columns which work great in the column names, but not in the rows of data. Some data has a comma to split name and surnames. Together with this, only every second field(column) is enclosed in double quotes. Using all this is breaking the rows in many more columns than it should. I have suggested changing the delimiter to a semicolon which actually works great and everything works fine, except this is not accepted by the customer as they don't want to change anything.
This is what I've done:
private static DataTable ImportFordEmailList(string csvFilePath)
{
DataTable csvData = new DataTable();
DataTable dt = new DataTable();
dt.Columns.Add("ColumnName");
dt.Rows.Clear();
try
{
using (TextFieldParser csvReader = new TextFieldParser(csvFilePath))
{
// csvReader.TextFieldType = FieldType.Delimited;
csvReader.SetDelimiters(new string[] { "," });
csvReader.HasFieldsEnclosedInQuotes = false;
csvReader.TrimWhiteSpace = true;
string[] colFields = csvReader.ReadFields();
foreach (string column in colFields)
{
if (dt.Rows.Count > 0)
{
string newColumn = Regex.Replace(column, "[^A-Za-z0-9]", "");
string findColum = "ColumnName = '" + newColumn.Trim() + "'";
DataRow[] foundRows = dt.Select(findColum);
if (foundRows.Length == 0)
{
DataRow dr = dt.NewRow();
dr["ColumnName"] = newColumn.Trim();
dt.Rows.Add(dr);
}
else
{
DataRow dr = dt.NewRow();
dr["ColumnName"] = newColumn.Trim() + "1";
dt.Rows.Add(dr);
}
}
else
{
string newColumn = column.Replace("'", "");
newColumn = newColumn.Replace(" ", "");
string clean = Regex.Replace(newColumn, "[^A-Za-z0-9 ]", "");
DataRow dr = dt.NewRow();
dr["ColumnName"] = clean.Trim();
dt.Rows.Add(dr);
}
}
foreach (DataRow row in dt.Rows)
{
string colName = Regex.Replace(row["ColumnName"].ToString().Trim(), "/^[ A-Za-z0-9]*$/", "");
DataColumn datecolumn = new DataColumn(colName);
datecolumn.AllowDBNull = true;
csvData.Columns.Add(datecolumn);
}
while (!csvReader.EndOfData)
{
string[] fieldData = csvReader.ReadFields();
for (int i = 0; i < fieldData.Length; i++)
{
if (fieldData[i] == "")
{
fieldData[i] = null;
}
}
foreach (string s in fieldData)
{
s.Replace("\"","");
Regex.Replace(s, "/^[ A-Za-z0-9 '@.()]", "");
string a = s;
}
csvData.Rows.Add(fieldData);
}
}
}
catch (Exception ex)
{
}
return csvData;
}
This is an example of how the data looks like:
Is there a way that I can work around this and make this work?
----- EDIT, Add data sample as text --------
Name,Name,Email,Manager Level1,Level 1 manager's email,Manager Level2,Level 2 manager's email
Adams, D. (Deon) ,"Adams, Deon. (D) ",username@email.com,"Masete, Thabo (B.T.)",username@email.com,"Fraser, Mervyn (M.)",username@email.com
Akaramunkongwanit, S. (Sirapra) ,"Akaramunkongwanit, Sirapra (S.)",username@email.com> ,"Naraphirom, Suphajitphat (Pin.)",username@email.com,"Jeeradeepalung, Jirawat (Jee.)",username@email.com
Angel, L. (Dave) ,"Angel, Dave (L.) ",username@email.com,"Causton, Keith (K.H.) ",username@email.com,"White, Chris- Manf Eng (C.F.) ",username@email.com
Apairat, J. (Janjira),"Apairat, Janjira (J.) "username@email.com,"Choksiriwanna, Phatthar (Patsy.)",username@email.com,"Phusitpoykai, Rachawan (R.) ",username@email.com