-1

User input is a csv file which has data like below:

SiteID,Format,Title,Category,Quantity,StartPrice,BuyItNowPrice,Duration,Description,PicURL
US ,FixedPriceItem,PLease Do Not Bid.  TISSOT STAINLESS STEEL Women's Quartz Watch 1,14111,1,,341,30,"
Brand: TISSOT
Gender: Women's
Style: Fashion
Features: Water Resistant
Band Material: Stainless Steel
Movement: Quartz : Battery
Display: Analog
Model: STYLIST
Country/Region of Manufacture: Switzerland
Case Width (mm): 25mm
Wrist (inches cm): 6.7 inches / 17 cm
Serial Number: R452","http://img2.jpegbay.com/gallery/004791260/1_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/2_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/3_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/4_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/5_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/6_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/7_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/8_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/9_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/10_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/11_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/33_f.jpg?1334"
US ,FixedPriceItem,PLease Do Not Bid.  TISSOT STAINLESS STEEL Women's Quartz Watch 2,14111,1,,342,30,"
Brand: TISSOT
Gender: Women's
Style: Fashion
Features: Water Resistant
Band Material: Stainless Steel
Movement: Quartz : Battery
Display: Analog
Model: STYLIST
Country/Region of Manufacture: Switzerland
Case Width (mm): 25mm
Wrist (inches cm): 6.7 inches / 17 cm
Serial Number: R452","http://img2.jpegbay.com/gallery/004791260/1_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/2_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/3_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/4_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/5_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/6_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/7_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/8_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/9_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/10_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/11_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/33_f.jpg?1334"
US ,FixedPriceItem,PLease Do Not Bid.  TISSOT STAINLESS STEEL Women's Quartz Watch 3,14111,1,,343,30,"
Brand: TISSOT
Gender: Women's
Style: Fashion
Features: Water Resistant
Band Material: Stainless Steel
Movement: Quartz : Battery
Display: Analog
Model: STYLIST
Country/Region of Manufacture: Switzerland
Case Width (mm): 25mm
Wrist (inches cm): 6.7 inches / 17 cm
Serial Number: R452","http://img2.jpegbay.com/gallery/004791260/1_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/2_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/3_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/4_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/5_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/6_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/7_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/8_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/9_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/10_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/11_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/33_f.jpg?1334"
US ,FixedPriceItem,PLease Do Not Bid.  TISSOT STAINLESS STEEL Women's Quartz Watch 4,14111,1,,344,30,"
Brand: TISSOT
Gender: Women's
Style: Fashion
Features: Water Resistant
Band Material: Stainless Steel
Movement: Quartz : Battery
Display: Analog
Model: STYLIST
Country/Region of Manufacture: Switzerland
Case Width (mm): 25mm
Wrist (inches cm): 6.7 inches / 17 cm
Serial Number: R452","http://img2.jpegbay.com/gallery/004791260/1_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/2_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/3_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/4_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/5_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/6_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/7_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/8_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/9_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/10_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/11_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/33_f.jpg?1334"

This is exactly a csv file data.

to read this data, I have tried this code :

private static List<TempBulkCSVItems> ProcessCSV(string fileName)
    {
        List<TempBulkCSVItems> tbcil = new List<TempBulkCSVItems>();
        //Set up our variables 
        string Feedback = string.Empty;
        string line = string.Empty;
        string[] strArray;
        DataTable dt = new DataTable();
        DataRow row;

        // work out where we should split on comma, but not in a sentance
        Regex r = new Regex(",(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))");

        //Set the filename in to our stream
        StreamReader sr = new StreamReader(fileName);

        //Read the first line and split the string at , with our regular express in to an array
        line = sr.ReadLine();
        strArray = r.Split(line);

        //For each item in the new split array, dynamically builds our Data columns. Save us having to worry about it.
        Array.ForEach(strArray, s => dt.Columns.Add(new DataColumn()));


        //Read each line in the CVS file until it's empty
        while ((line = sr.ReadLine()) != null)
        {
            line = line.Replace(",\"", ",").Replace("\"", "");
            if (line.Length > 0)
            {
                row = dt.NewRow();

                //add our current value to our data row
                row.ItemArray = r.Split(line);
                dt.Rows.Add(row);

                TempBulkCSVItems tbci = new TempBulkCSVItems();
                tbci.SiteID = row[0].ToString();
                tbci.Format = row[1].ToString();
                tbci.Title = row[2].ToString();
                tbci.Category = row[3].ToString();
                tbci.Quantity = row[4].ToString();
                tbci.StartPrice = row[5].ToString();
                tbci.BuyItNowPrice = row[6].ToString();
                tbci.Duration = row[7].ToString();
                tbci.Description = row[8].ToString();
                tbci.PicURL = row[9].ToString();
                tbcil.Add(tbci);
            }
        }

        //Tidy Streameader up
        sr.Dispose();

        //return a the new DataTable
        return tbcil;
    }

the problem I found, I am reading a row by readline(). and another point is, description and PicURL field contains multiple line and multiple comma (,). That's why its breaking by my code. But in this case, what will be the solution?

Abdur Rahim
  • 3,975
  • 14
  • 44
  • 83
  • 2
    The solution is to use an already-existing CSV file reader, eg see http://stackoverflow.com/questions/3507498/reading-csv-file – David Arno Nov 20 '15 at 13:14
  • DO NOT try to read CSV with regexes. Your best option is to use an existing reader. I've used this one in the past: http://www.codeproject.com/Articles/9258/A-Fast-CSV-Reader. – Antoine Nov 20 '15 at 13:16

2 Answers2

0

You can read the values form a CSV file like this:

using (TextFieldParser parser = new TextFieldParser(@"c:\temp\test.csv"))
{
    parser.TextFieldType = FieldType.Delimited;
    parser.SetDelimiters(",");
    while (!parser.EndOfData) 
    {
        //Processing row
        string[] fields = parser.ReadFields();
        foreach (string field in fields) 
        {
            //TODO: Process field
        }
    }
}

See: Reading CSV files using C#

Community
  • 1
  • 1
Karl Gjertsen
  • 4,690
  • 8
  • 41
  • 64
  • 1
    Did you forget `HasFieldsEnclosedInQuotes`? Also looks like they might need a reference to VisualBasic and `using Microsoft.VisualBasic.FileIO;`. – crashmstr Nov 20 '15 at 13:26
0

Here's something you can use for doing this type of parsing.

public static IEnumerable<IList<string>> ParseDelimitedLines(
    this IEnumerable<string> lines,
    char delimiter,
    char? singleEscape,
    char? beginEndEscape)
{
    var row = new List<string>();
    var currentItem = new StringBuilder();
    bool previousSingleEscape = false;
    bool insideEscape = false;
    bool needsAppendLine = false;
    foreach (var line in lines)
    {
        previousSingleEscape = false;
        if (needsAppendLine)
        {
            currentItem.AppendLine();
            needsAppendLine = false;
        }

        foreach (char c in line)
        {
            if (c == beginEndEscape && !previousSingleEscape)
            {
                insideEscape = !insideEscape;
            }

            if (c == delimiter && !previousSingleEscape && !insideEscape)
            {
                row.Add(currentItem.ToString());
                currentItem.Clear();
                continue;
            }

            previousSingleEscape = c == singleEscape && !previousSingleEscape;
            if(!previousSingleEscape)
                currentItem.Append(c);
        }

        if (!insideEscape && !previousSingleEscape)
        {
            row.Add(currentItem.ToString());
            yield return row;
            row = new List<string>();
            currentItem.Clear();
        }
        else
        {
            needsAppendLine = true;
        }
    }

    if (insideEscape || previousSingleEscape)
    {
        row.Add(currentItem.ToString());
        yield return row;
    }
}

With that the following

string text = @"This,is,simple,stuff
Now,""it,gets"",harder
But,wait\,there,\""is,more\""
And,this\
way,to,do,newline
And,""another
way"",fin";

int r = 0;
foreach (
    var row in text.Split(new[] { Environment.NewLine }, StringSplitOptions.None).ParseDelimitedLines(',', '\\', '"'))
{
    Console.WriteLine("Row " + ++r);
    int c = 0;
    foreach (var item in row)
    {
        Console.WriteLine("Column " + ++c + ": <<" + item + ">>");
    }
}

Will output

Row 1
Column 1: <<This>>
Column 2: <<is>>
Column 3: <<simple>>
Column 4: <<stuff>>
Row 2
Column 1: <<Now>>
Column 2: <<"it,gets">>
Column 3: <<harder>>
Row 3
Column 1: <<But>>
Column 2: <<wait,there>>
Column 3: <<"is>>
Column 4: <<more">>
Row 4
Column 1: <<And>>
Column 2: <<this
way>>
Column 3: <<to>>
Column 4: <<do>>
Column 5: <<newline>>
Row 5
Column 1: <<And>>
Column 2: <<"another
way">>
Column 3: <<fin>>

And you can use it like this

var rows = File.ReadLines("yourFile.txt").ParseDelimitedLines(',', '\\', '"');
foreach(var row in rows)
{
    string column1 = row[0];
    ...
}
juharr
  • 31,741
  • 4
  • 58
  • 93