13

I've searched internet for this and couldn't really find a question like it. Everyone was looking for a way to import an individual sheet in the excel file but what I want is to import all the sheets in the file to DataTable's in DataSet without knowing the sheet names.

I've not done much things with Excel before. This a sample and partially working code I've found on the internet and it only parses the given sheet name:

public static DataSet Parse(string fileName, string workSheetName)
{
    string connectionString = string.Format("provider=Microsoft.Jet.OLEDB.4.0; data source={0};Extended Properties=Excel 8.0;", fileName);
    string query = string.Format("SELECT * FROM [{0}$]", workSheetName);

    DataSet data = new DataSet();
    using (OleDbConnection con = new OleDbConnection(connectionString))
    {
        con.Open();
        OleDbDataAdapter adapter = new OleDbDataAdapter(query, con);
        adapter.Fill(data);
    }

    return data;
}

In the code above, as you see, workSheetName should be passed in so the query can know where to look at to import. In my case, I want it to traverse all the sheets no matter what they are named like and import them to individual DataTable's of a DataSet.

So in essence, the final thing will be a DataSet in which each DataTable holds rows for each sheet in the imported file.

Tarik
  • 79,711
  • 83
  • 236
  • 349

5 Answers5

20

This is a code I came up with and it works perfect but I saw someone else already added an answer:

static DataSet Parse(string fileName)
{
    string connectionString = string.Format("provider=Microsoft.Jet.OLEDB.4.0; data source={0};Extended Properties=Excel 8.0;", fileName);


    DataSet data = new DataSet();

    foreach(var sheetName in GetExcelSheetNames(connectionString))
    {
        using (OleDbConnection con = new OleDbConnection(connectionString))
        {    
            var dataTable = new DataTable();
            string query = string.Format("SELECT * FROM [{0}]", sheetName);
            con.Open();
            OleDbDataAdapter adapter = new OleDbDataAdapter(query, con);
            adapter.Fill(dataTable);
            data.Tables.Add(dataTable);
        }
    }

    return data;
}

static string[] GetExcelSheetNames(string connectionString)
{
        OleDbConnection con = null;
        DataTable dt = null;
        con= new OleDbConnection(connectionString);
        con.Open();
        dt = con.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);

        if (dt == null)
        {
            return null;
        }

        String[] excelSheetNames = new String[dt.Rows.Count];
        int i = 0;

        foreach (DataRow row in dt.Rows)
        {
            excelSheetNames[i] = row["TABLE_NAME"].ToString();
            i++;
        }

        return excelSheetNames;
}
Tarik
  • 79,711
  • 83
  • 236
  • 349
17

Because I was bored:

 static void Main(string[] args)
 {
            string filename = @"c:\temp\myfile.xlsx";    
            System.Data.OleDb.OleDbConnection myConnection = new System.Data.OleDb.OleDbConnection( 
                        "Provider=Microsoft.ACE.OLEDB.12.0; " +
                         "data source='" + filename + "';" +
                            "Extended Properties=\"Excel 12.0;HDR=YES;IMEX=1\" ");
            myConnection.Open();
            DataTable mySheets = myConnection.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, new object[] { null, null, null, "TABLE" });                
            DataSet ds = new DataSet();
            DataTable dt;

            for (int i = 0; i <= mySheets.Rows.Count; i++)
            {
                   dt =   makeDataTableFromSheetName(filename, mySheets.Rows[i]["TABLE_NAME"].ToString());
                   ds.Tables.Add(dt);
            }
 }

private static DataTable makeDataTableFromSheetName(string filename, string sheetName)
{      
    System.Data.OleDb.OleDbConnection myConnection = new System.Data.OleDb.OleDbConnection(
    "Provider=Microsoft.ACE.OLEDB.12.0; " +
    "data source='" + filename + "';" +
    "Extended Properties=\"Excel 12.0;HDR=YES;IMEX=1\" ");

    DataTable dtImport = new DataTable();
    System.Data.OleDb.OleDbDataAdapter myImportCommand = new System.Data.OleDb.OleDbDataAdapter("select * from [" + sheetName + "$]", myConnection);
    myImportCommand.Fill(dtImport);
    return dtImport;
}
MDMalik
  • 3,951
  • 2
  • 25
  • 39
Avitus
  • 15,640
  • 6
  • 43
  • 53
4

The function that was suggested by Avitus is correct but it has logica error, you must rewrite in :

DataTable dtImport = new DataTable();
using ( System.Data.OleDb.OleDbConnection myConnection = new System.Data.OleDb.OleDbConnection(
            "Provider=Microsoft.ACE.OLEDB.12.0; " +
             "data source='" + filename + "';" +
                "Extended Properties=\"Excel 12.0;HDR=YES;IMEX=1\" ")){


using ( System.Data.OleDb.OleDbDataAdapter myImportCommand = new System.Data.OleDb.OleDbDataAdapter("select * from [" + sheetName + "$]", myConnection))
myImportCommand.Fill(dtImport);
} return dtImport;

this is correct, otherwise you must dispose connection and dataadapter manually.

user951083
  • 59
  • 1
  • 6
0

This might not be the best and the fastest one, but its another way (Edit- added elimination of blank cells):

    public static DataSet ReadWorkbook(string excelFileName, bool useFirstRowAsColumnName = false)
    {
        var excel = new Microsoft.Office.Interop.Excel.Application();
        var workBook = excel.Workbooks.Open(excelFileName, 0, true, 5, "", "", true, Microsoft.Office.Interop.Excel.XlPlatform.xlWindows, "\t", false, false, 0, true, 1, 0);//MLHIDE
        try
        {
            System.Data.DataSet ds = new DataSet(excelFileName);
            foreach (var sheet0 in workBook.Worksheets)
            {
                var sheet = (Microsoft.Office.Interop.Excel.Worksheet)sheet0;
                try
                {
                    var dt = readSheet(sheet, useFirstRowAsColumnName);
                    if (dt != null)
                        ds.Tables.Add(dt);
                }
                finally
                {
                    releaseObject(sheet);
                }
            }
            return ds;
        }
        finally
        {
            workBook.Close(true, null, null);
            excel.Quit();

            releaseObject(workBook);
            releaseObject(excel);
        }
    }

    /// <summary>
    /// Returns null for empty sheets or if sheet is not found.
    /// </summary>
    public static DataTable ReadSheet(string excelFileName, string sheetName, bool useFirstRowAsColumnName = false)
    {
        var excel = new Microsoft.Office.Interop.Excel.Application();
        var workBook = excel.Workbooks.Open(excelFileName, 0, true, 5, "", "", true, Microsoft.Office.Interop.Excel.XlPlatform.xlWindows, "\t", false, false, 0, true, 1, 0);//MLHIDE
        try
        {
            foreach (var sheet0 in workBook.Worksheets)
            {
                var sheet = (Microsoft.Office.Interop.Excel.Worksheet)sheet0;
                try
                {
                    if (sheet.Name.Equals_Wildcard(sheetName))
                    {
                        var dt = readSheet(sheet, useFirstRowAsColumnName);
                        if (dt != null)
                            return dt;
                    }
                }
                finally
                {
                    releaseObject(sheet);
                }
            }
            return null;
        }
        finally
        {
            workBook.Close(true, null, null);
            excel.Quit();

            releaseObject(workBook);
            releaseObject(excel);
        }
    }

    /// <summary>
    /// Returns null for empty sheets
    /// </summary>
private static DataTable readSheet(Microsoft.Office.Interop.Excel.Worksheet sheet, bool useFirstRowAsColumnName = false)
        {
            using (Dece.Common.BeginChangeCurrentCultureBlock_EN_us())
            {
                var range = sheet.UsedRange;
                try
                {
                    object[,] values = (object[,])range.Value2;
                    int rowCount = values.GetLength(0);
                    int colCount = values.GetLength(1);
                    int rowCount0 = rowCount;
                    int colCount0 = colCount;
                    #region find row-col count
                    {
                        bool ok = false;
                        for (int row = rowCount; row > 0; row--)
                            if (!ok)
                                for (int col = colCount; col > 0; col--)
                                {
                                    var val = values[row, col];
                                    if ((val != null) && (!System.Convert.ToString(val).IsNullOrEmpty()))
                                    {
                                        rowCount = row;
                                        ok = true;
                                        break;
                                    }
                                }
                            else
                                break;
                    }
                    {
                        bool ok = false;
                        for (int col = colCount; col > 0; col--)
                            if (!ok)
                                for (int row = rowCount; row > 0; row--)
                                {
                                    var val = values[row, col];
                                    if ((val != null) && (!System.Convert.ToString(val).IsNullOrEmpty()))
                                    {
                                        colCount = col;
                                        ok = true;
                                        break;
                                    }
                                }
                            else
                                break;
                    }
                    #endregion
                    if ((rowCount > 0) && (colCount > 0))
                    {  
                        var dt = new DataTable(sheet.Name);
                        dt.BeginLoadData();
                        try
                        {
                            for (int col = 1; col <= colCount; col++)
                                dt.Columns.Add_RenameIfRequired(useFirstRowAsColumnName ? values[1, col].ToString_NullProof() : col.ToString());
                            var arr = new object[colCount];
                            for (int row = useFirstRowAsColumnName ? 1 : 0; row < rowCount; row++)
                            {
                                for (int col = 1; col <= colCount; col++)
                                    arr[col - 1] = values[row + 1, col];
                                dt.Rows.Add(arr);
                            }
                        }
                        finally
                        {
                            dt.EndLoadData();
                        }
                        return dt;                        
                    }
                    else
                        return null;
                }
                finally
                {
                    releaseObject(range);
                }
            }
        }

    private static void releaseObject(object obj)
    {
        try
        {
            System.Runtime.InteropServices.Marshal.ReleaseComObject(obj);
            obj = null;
        }
        catch (Exception ex)
        {
            obj = null;
            throw new Exception("Unable to release the Object " + ex.ToString(), ex);//MLHIDE
        }
        finally
        {
            GC.Collect();
        }
    }
Koray
  • 1,768
  • 1
  • 27
  • 37
0

C#

A neat and tidy, minimal version that provides a dataset with tables named as per the sheets (without the trailing dollar):

    private static OleDbConnection GetConnection(string filename, bool openIt)
    {
        // if your data has no header row, change HDR=NO
        var c = new OleDbConnection($"Provider=Microsoft.ACE.OLEDB.12.0;Data Source='{filename}';Extended Properties=\"Excel 12.0;HDR=YES;IMEX=1\" ");
        if (openIt)
            c.Open();
        return c;
    }

    private static DataSet GetExcelFileAsDataSet(OleDbConnection conn)
    {
        var sheets = conn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, new[] { default, default, default, "TABLE" });
        var ds = new DataSet();
        foreach (DataRow r in sheets.Rows)
            ds.Tables.Add(GetExcelSheetAsDataTable(conn, r["TABLE_NAME"].ToString()));
        return ds;
    }

    private static DataTable GetExcelSheetAsDataTable(OleDbConnection conn, string sheetName)
    {
        using (var da = new OleDbDataAdapter($"select * from [{sheetName}]", conn))
        {
            var dt = new DataTable() { TableName = sheetName.TrimEnd('$') };
            da.Fill(dt);
            return dt;
        }
    }

Use it like:

DataSet ds;

using(c = GetConnection(@"C:\path\to\your\xl.xlsx", true)
    ds = GetExcelFileAsDataSet(c);

Or if you only want one table and you know all the exact sheet name you want (remember that they have a dollar on the end):

DataTable dt;

using(c = GetConnection(@"C:\path\to\your\xl.xlsx", true)
    dt = GetExcelSheetAsDataTable(c, "Sheet1$");

VB.NET

Bonus! Note: needs a modern version of VB that understands things like string interpolation, With etc

        Private Shared Function GetConnection(filename As String, openIt As Boolean) As OleDbConnection
        'if your data has no header row, change HDR=NO
        Dim c = New OleDbConnection($"Provider=Microsoft.ACE.OLEDB.12.0;Data Source='{filename}';Extended Properties=""Excel 12.0;HDR=YES;IMEX=1"" ")
        If openIt Then c.Open()

        Return c
    End Function

    Private Shared Function GetExcelFileAsDataSet(conn As OleDbConnection) As DataSet

        Dim sheets = conn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, {Nothing, Nothing, Nothing, "TABLE"})
        Dim ds = New DataSet()

        For Each r As DataRow In sheets.Rows
            ds.Tables.Add(GetExcelSheetAsDataTable(conn, r("TABLE_NAME").ToString()))
        Next

        Return ds

    End Function

    Private Shared Function GetExcelSheetAsDataTable(conn As OleDbConnection, sheetName As String) As DataTable
        Using da = New OleDbDataAdapter($"select * from [{sheetName}]", conn)
            Dim dt = New DataTable() With {.TableName = sheetName.TrimEnd("$"c)}
            da.Fill(dt)
            Return dt
        End Using
    End Function

Use it like:

    Dim ds As DataSet

    Using c = GetConnection("C:\path\to\your\xl.xlsx", True)
        ds = GetExcelFileAsDataSet(c)
    End Using 'closes connection

Or for a single sheet (Remember sheet names end in a dollar) whose name you know:

    Dim dt As DataTable

    Using c = GetConnection("C:\path\to\your\xl.xlsx", True)
        dt = GetExcelSheetAsDataTable(c, "Sheet1$")
    End Using 'closes connection
Caius Jard
  • 72,509
  • 5
  • 49
  • 80