Give the following a try--it uses NuGet package DocumentFormat.OpenXml
The code is from Using OpenXmlReader. However, I modified it to add data to a DataTable. Since you're reading data from the same Excel file multiple times, it's faster to open the Excel file once using an instance of SpreadSheetDocument and dispose of it when finished. Since the instance of SpreedSheetDocument needs to be disposed of before your application exits, IDisposable
is used.
Where it says "ToDo", you'll need to replace the code that creates the DataTable columns with your own code to create the correct columns for your project.
I tested the code below with an Excel file containing approximately 15,000 rows. When reading 100 rows at a time, the first read took approximately 500 ms - 800 ms, whereas subsequent reads took approximately 100 ms - 400 ms.
Create a class (name: HelperOpenXml)
HelperOpenXml.cs
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using System.Data;
using System.Diagnostics;
namespace ExcelReadSpecifiedRowsUsingOpenXml
{
public class HelperOpenXml : IDisposable
{
public string Filename { get; private set; } = string.Empty;
public int RowCount { get; private set; } = 0;
private SpreadsheetDocument spreadsheetDocument = null;
private DataTable dt = null;
public HelperOpenXml(string filename)
{
this.Filename = filename;
}
public void Dispose()
{
if (spreadsheetDocument != null)
{
try
{
spreadsheetDocument.Dispose();
dt.Clear();
}
catch(Exception ex)
{
throw ex;
}
}
}
public DataTable GetRowsSax(int startRow, int endRow, bool firstRowIsHeader = false)
{
int startIndex = startRow;
int endIndex = endRow;
if (firstRowIsHeader)
{
//if first row is header, increment by 1
startIndex = startRow + 1;
endIndex = endRow + 1;
}
if (spreadsheetDocument == null)
{
//create new instance
spreadsheetDocument = SpreadsheetDocument.Open(Filename, false);
//create new instance
dt = new DataTable();
//ToDo: replace 'dt.Columns.Add(...)' below with your code to create the DataTable columns
//add columns to DataTable
dt.Columns.Add("A");
dt.Columns.Add("B");
dt.Columns.Add("C");
dt.Columns.Add("D");
dt.Columns.Add("E");
dt.Columns.Add("F");
dt.Columns.Add("G");
dt.Columns.Add("H");
dt.Columns.Add("I");
dt.Columns.Add("J");
dt.Columns.Add("K");
}
else
{
//remove existing data from DataTable
dt.Rows.Clear();
}
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
int numWorkSheetParts = 0;
foreach (WorksheetPart worksheetPart in workbookPart.WorksheetParts)
{
using (OpenXmlReader reader = OpenXmlReader.Create(worksheetPart))
{
int rowIndex = 0;
//use the reader to read the XML
while (reader.Read())
{
if (reader.ElementType == typeof(Row))
{
reader.ReadFirstChild();
List<string> cValues = new List<string>();
int colIndex = 0;
do
{
//only get data from desired rows
if ((rowIndex > 0 && rowIndex >= startIndex && rowIndex <= endIndex) ||
(rowIndex == 0 && !firstRowIsHeader && rowIndex >= startIndex && rowIndex <= endIndex))
{
if (reader.ElementType == typeof(Cell))
{
Cell c = (Cell)reader.LoadCurrentElement();
string cellRef = c.CellReference; //ex: A1, B1, ..., A2, B2
string cellValue = string.Empty;
//string/text data is stored in SharedString
if (c.DataType != null && c.DataType == CellValues.SharedString)
{
SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText));
cellValue = ssi.Text.Text;
}
else
{
cellValue = c.CellValue.InnerText;
}
//Debug.WriteLine("{0}: {1} ", c.CellReference, cellValue);
//add value to List which is used to add a row to the DataTable
cValues.Add(cellValue);
}
}
colIndex += 1; //increment
} while (reader.ReadNextSibling());
if (cValues.Count > 0)
{
//if List contains data, use it to add row to DataTable
dt.Rows.Add(cValues.ToArray());
}
rowIndex += 1; //increment
if (rowIndex > endIndex)
{
break; //exit loop
}
}
}
}
numWorkSheetParts += 1; //increment
}
DisplayDataTableData(dt); //display data in DataTable
return dt;
}
private void DisplayDataTableData(DataTable dt)
{
foreach (DataColumn dc in dt.Columns)
{
Debug.WriteLine("colName: " + dc.ColumnName);
}
foreach (DataRow r in dt.Rows)
{
Debug.WriteLine(r[0].ToString() + " " + r[1].ToString());
}
}
}
}
Usage:
private string excelFilename = @"C:\Temp\Test.xlsx";
private HelperOpenXml helperOpenXml = null;
...
private void GetData(int startIndex, int endIndex, bool firstRowIsHeader)
{
helperOpenXml.GetRowsSax(startIndex, endIndex, firstRowIsHeader);
}
Note: Make sure to call Dispose()
(ex: helperOpenXml.Dispose();
) before your application exits.
Update:
OpenXML stores dates as the number of days since 01 Jan 1900. For dates prior to 01 Jan 1900, they are stored in SharedString. For more info see Reading a date from xlsx using open xml sdk
Here's a code snippet:
Cell c = (Cell)reader.LoadCurrentElement();
...
string cellValue = string.Empty
...
cellValue = c.CellValue.InnerText;
double dateCellValue = 0;
Double.TryParse(cellValue, out dateCellValue);
DateTime dt = DateTime.FromOADate(dateCellValue);
cellValue = dt.ToString("yyyy/MM/dd");