2

The following code allows you to convert a file from an XLS to a CSV:

static void ConvertExcelToCsv(string excelFile, string csvOutputFile, int worksheetNumber = 1)
{
    //Checks if the two files required exist or not and then throws an exception.
    if (!File.Exists(excelFile)) throw new FileNotFoundException(excelFile);
    if (File.Exists(csvOutputFile))
    {
        File.Delete(csvOutputFile);
    }

    // connection string
    var cnnStr = String.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data Source={0};Extended Properties=\"Excel 8.0;IMEX=1;HDR=NO\"", excelFile);
    //Creates a new OleDbConnection with an argument of cnn
    var cnn = new OleDbConnection(cnnStr);

    //creates new datatable in memory to store the read excel spreadsheet
    var dt = new DataTable();
    try
    {
        //Opens the new connection called "cnn".
        cnn.Open();
        var schemaTable = cnn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
        if (schemaTable.Rows.Count < worksheetNumber) throw new ArgumentException("The worksheet number provided cannot be found in the spreadsheet");
        string worksheet = schemaTable.Rows[worksheetNumber - 1]["table_name"].ToString().Replace("'", "");
        Console.WriteLine("worksheet:" + worksheet);
        string sql = String.Format("select * from [{0}]", worksheet).ToString();
        //string sql = worksheet.ToString();
        Console.WriteLine("sql:" + sql);
        var da = new OleDbDataAdapter(sql, cnn);
        da.Fill(dt);
    }
    catch (OleDbException e)
    {
        throw new ArgumentException(e.Message, "Error during the conversion");

    }
    finally
    {
        // free resources
        cnn.Close();
    }

    // write out CSV data
    using (var wtr = new StreamWriter(csvOutputFile))
    {
        foreach (DataRow row in dt.Rows)
        {
            bool firstLine = true;
            foreach (DataColumn col in dt.Columns)
            {
                if (!firstLine)
                {
                    wtr.Write("~");
                }
                else
                {
                    firstLine = false;
                }
                var data = row[col.ColumnName].ToString().Replace("\"", "\"\"");
                // wtr.Write(String.Format("{0}", data));
                wtr.Write(data.ToString());
            }
            wtr.WriteLine();
        }
    }
}

Which converts the XLS to CSV file but when I have a cell of this type:

Cell of this type

Converts it this way:

Converts it his way

How do I change the code to have the same value as XLS?

est
  • 21
  • 3
  • Excel is TOO smart. It attempts to data and automatically guess the types. So it converted your integer into a floating point number. Do you get same results if you open the csv in notepad? If notepad is correct than issue is just importing the csv into excel is the issue and your code is fine. You can either change the way you are importing csv into excel or put double quotes around all the csv data so when you open the csv in excel it will treat data as text instead of a number. – jdweng Jul 09 '20 at 16:44
  • opening it with notepad the result is always this 6.28007e + 006. I need the csv file to import the data – est Jul 10 '20 at 06:56
  • Have you tried something like [this](https://stackoverflow.com/a/5034856/10216583)? –  Jul 10 '20 at 08:31
  • this function, but i need delimiter ~,how can I do? – est Jul 10 '20 at 08:42
  • The is an issue with excel that always existed with cells that are General Format. The Jet Engine (and ACE) guess at cell types when General Format is used and often guess wrong. Is column the first column on the worksheet? Try changing the column to integer in the worksheet and see what happens. – jdweng Jul 10 '20 at 09:40

1 Answers1

1

The suggested solution in my comment above will export an Excel Sheet as a CSV file using Excel Interop Objects.

Applying that on your case:

using System;
using System.IO;
using System.Runtime.InteropServices;
using Excel = Microsoft.Office.Interop.Excel;
//...

public static void ConvertExcelToCsv(
    string excelFile,
    string csvOutputFile,
    int worksheetNumber = 1)
{
    Excel.Application xlApp = null;
    Excel.Workbook xlWorkBook = null;

    try
    {
        if (!File.Exists(excelFile))
            throw new FileNotFoundException(excelFile);

        if (File.Exists(csvOutputFile))
            File.Delete(csvOutputFile);

        xlApp = new Excel.Application();
        xlWorkBook = xlApp.Workbooks.Open(excelFile, Type.Missing, true);
        var xlSheet = xlWorkBook.Worksheets[worksheetNumber] as Excel.Worksheet;

        if (xlSheet is null)
            throw new ArgumentException();

        xlSheet.SaveAs(csvOutputFile, Excel.XlFileFormat.xlCSV);
    }
    catch (FileNotFoundException)
    {
        Console.WriteLine($"'{excelFile}' does not exist!");
    }
    catch (ArgumentException)
    {
        Console.WriteLine("The worksheet number provided does not exist.");
    }
    catch (Exception ex)
    {
        Console.WriteLine(ex.Message);
    }
    finally
    {
        xlWorkBook?.Close(false);
        xlApp?.Quit();

        if (xlWorkBook != null) Marshal.FinalReleaseComObject(xlWorkBook);
        if (xlApp != null) Marshal.FinalReleaseComObject(xlApp);

        xlWorkBook = null;
        xlApp = null;

        GC.Collect();
        GC.WaitForPendingFinalizers();
    }
}

However the TextInfo.ListSeparator will be used as the delimiter. If you need to use a specific character like ~ as separator, using the same approach:

using System;
using System.IO;
using System.Linq;
using System.Globalization;
using System.Runtime.InteropServices;
using Excel = Microsoft.Office.Interop.Excel;
//...

public static void ConvertExcelToCsv(
    string excelFile,
    string csvOutputFile,
    int worksheetNumber = 1,
    string delimiter = null)
{
    Excel.Application xlApp = null;
    Excel.Workbook xlWorkBook = null;

    try
    {
        if (!File.Exists(excelFile))
            throw new FileNotFoundException(excelFile);

        if (File.Exists(csvOutputFile))
            File.Delete(csvOutputFile);

        xlApp = new Excel.Application();
        xlWorkBook = xlApp.Workbooks.Open(excelFile, Type.Missing, true);
        var xlSheet = xlWorkBook.Worksheets[worksheetNumber] as Excel.Worksheet;                

        if (xlSheet is null)
            throw new ArgumentException();

        if (delimiter is null)
            delimiter = CultureInfo.CurrentCulture.TextInfo.ListSeparator;

        var xlRange = xlSheet.UsedRange;

        using (var sw = new StreamWriter(csvOutputFile))
            //Use:
            //foreach (var r in xlRange.Rows.Cast<Excel.Range>().Skip(1))
            //If the first row is a header row and you want to skip it...
            foreach (Excel.Range row in xlRange.Rows)
                sw.WriteLine(string.Join(delimiter, row.Cells.Cast<Excel.Range>()
                    .Select(x => x.Value2)));

    }
    catch (FileNotFoundException)
    {
        Console.WriteLine($"'{excelFile}' does not exist!");
    }
    catch (ArgumentException)
    {
        Console.WriteLine("The worksheet number provided does not exist.");
    }
    catch (Exception ex)
    {
        Console.WriteLine(ex.Message);
    }
    finally
    {
        xlWorkBook?.Close(false);
        xlApp?.Quit();

        if (xlWorkBook != null) Marshal.FinalReleaseComObject(xlWorkBook);
        if (xlApp != null) Marshal.FinalReleaseComObject(xlApp);

        xlWorkBook = null;
        xlApp = null;

        GC.Collect();
        GC.WaitForPendingFinalizers();
    }
}

Note that, the more cells the more processing time. However, you will get the values as they are and regardless whether the Sheet has mixed-type columns or not.

Also, you might want to try the OleDb way for faster processing. Instead of filling a DataTable and looping over the columns and rows again to write the output lines, use the OleDbDataReader to get the values of each row from the Sheet, concatenate and separate them by the delimiter, and pass the string to the SteamWriter.WriteLine method:

using System;
using System.IO;
using System.Linq;
using System.Data.OleDb;
using System.Globalization;
//...

public static void ConvertExcelToCsv(
    string excelFile,
    string csvOutputFile,
    int worksheetNumber = 1,
    string delimiter = null)
{
    try
    {
        if (!File.Exists(excelFile))
            throw new FileNotFoundException(excelFile);

        if (File.Exists(csvOutputFile))
            File.Delete(csvOutputFile);

        if (delimiter is null)
            delimiter = CultureInfo.CurrentCulture.TextInfo.ListSeparator;

        var cnnStr = $"Provider=Microsoft.Jet.OLEDB.4.0;Data Source={excelFile};" +
            $"Extended Properties='Excel 8.0;HDR=Mo;IMEX=1;'";

        using (var cnn = new OleDbConnection(cnnStr))
        {
            cnn.Open();

            var schemaTable = cnn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);

            if (schemaTable.Rows.Count < worksheetNumber)
                throw new ArgumentException();

            var worksheet = schemaTable.Rows[worksheetNumber - 1]["table_name"]
                .ToString().Replace("'", "");

            using (var cmd = new OleDbCommand($"SELECT * FROM [{worksheet}]", cnn))
            using (var r = cmd.ExecuteReader())
            using (var sw = new StreamWriter(csvOutputFile))
                while (r.Read())
                {
                    var values = new object[r.FieldCount];
                    r.GetValues(values);
                    sw.WriteLine(string.Join(delimiter, values));
                }
        }
    }
    catch (FileNotFoundException)
    {
        Console.WriteLine($"'{excelFile}' does not exist!");
    }
    catch (ArgumentException)
    {
        Console.WriteLine("The worksheet number provided does not exist.");
    }
    catch (OleDbException ex)
    {
        Console.WriteLine(ex.Message);
    }
    catch (Exception ex)
    {
        Console.WriteLine(ex.Message);
    }
}

The caller:

void TheCaller()
{
    var xlFile = "XlsFile.xls";
    var csvFile = Path.ChangeExtension(xlFile, "csv");
    var delimiter = "~";
    var sheetNumber = 1;

    Console.WriteLine("Exporting...");
    //ConvertExcelToCsv(xlFile, csvFile, sheetNumber); //To call the first code snippet.
    ConvertExcelToCsv(xlFile, csvFile, sheetNumber, delimiter);
    Console.WriteLine("Done...!");
}

Or you might need an asynchronous call especially for the first two options:

async void TheCaller()
{
    var xlFile = "XlsFile.xls";
    var csvFile = Path.ChangeExtension(xlFile, "csv");
    var delimiter = "~";
    var sheetNumber = 1;

    Console.WriteLine("Exporting...");
    await Task.Run(() => ConvertExcelToCsv(xlFile, csvFile, sheetNumber, delimiter));
    Console.WriteLine("Done...!");
}

Notes

1. To use Excel Interop Objets, add reference to the Microsoft.Office.Interop.Excel
2. You might want to check this out.