As input, I have a set of excel files with several worksheets inside. I need to export a single csv file for each worksheet. Below is my code which works but it is very slow. It builds upon the solutions proposed in this previous post. Please consider that I have to run this on rather big .xlsx files (approx. 300Mb).
QUESTION: Is there any way to improve this?
void Main()
{
string folder = @"\\PATH_TO_FOLDER\";
var files = Directory.GetFiles(folder, "*.xlsx", SearchOption.TopDirectoryOnly);
foreach (string file in files)
{
ConvertToCsv(file, Directory.GetParent(file) + @"\\output\");
}
}
public static void ConvertToCsv(string file, string targetFolder)
{
FileInfo finfo = new FileInfo(file);
ExcelPackage package = new ExcelPackage(finfo);
// if targetFolder doesn't exist, create it
if (!Directory.Exists(targetFolder)) {
Directory.CreateDirectory(targetFolder);
}
var worksheets = package.Workbook.Worksheets;
int sheetcount = 0;
foreach (ExcelWorksheet worksheet in worksheets)
{
sheetcount++;
var maxColumnNumber = worksheet.Dimension.End.Column;
var currentRow = new List<string>(maxColumnNumber);
var totalRowCount = worksheet.Dimension.End.Row+1;
var currentRowNum = 1;
//No need for a memory buffer, writing directly to a file
//var memory = new MemoryStream();
string file_name = targetFolder + Path.GetFileNameWithoutExtension(file) + "_" + sheetcount + ".csv";
using (var writer = new StreamWriter(file_name, false, Encoding.UTF8))
{
//the rest of the code remains the same
for (int i = 1; i < totalRowCount; i++)
{
i.Dump();
// populate line with semi columns separators
string line = "";
for (int j = 1; j < worksheet.Dimension.End.Column+1; j++)
{
if (worksheet.Cells[i, j].Value != null)
{
string cell = worksheet.Cells[i, j].Value.ToString() + ";";
line += cell;
}
}
// write line
writer.WriteLine(line);
}
}
}
}