0

I have created a windows app that processes a set of excel files from a folder looking for some selective information from each of them. Later, it stores all those information into a new excel sheet. It's kind of a report generation tool. I am having a hard time in attempting to reduce the total Excecution time for this app. Currently, it takes around 45-50 secs overall to complete one cycle of successful execution. The code is similar to this:

P.S. I cannot share the actual code to due to restriction policies. But the code shown below is suffcient enough to provide some solution.

    #region master_tracking_sheet
    public string[] xlsfiles;
    public string[] personList = new string[35];
    public string[,] excelSheet_det = new string[35, 6];
    private void buttonBrowseFolder_Click(object sender, EventArgs e)
    {
        if (folder_browse_excelSheet.ShowDialog() == DialogResult.OK)
        {
            this.txt_DirPath.Text = folder_browse_excelSheet.SelectedPath;
            gotFile = true;
        }

    }
    private void btnCreateMasterSheet_Click(object sender, EventArgs e)
    {
        try
        {
            if (gotFile == false)
            {
                MessageBox.Show("Please select a folder first!");
            }
            else
            {
                var extensions = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
                {
                    ".xls",
                    ".xlsx",
                };
                userNotifier = false;
                var baseDir = txt_DirPath.Text;
                var file_count = Directory.EnumerateFiles(baseDir).Count(filename => extensions.Contains(Path.GetExtension(filename)));
                Excel.Application application = new Excel.Application();
                xlsfiles = Directory.GetFiles(folder_browse_excelSheet.SelectedPath, "*.xls");
                for (int j = 1; j <= file_count; j++) //Generating list of persons whose excelSheets are there
                {
                    csvFilePath = System.IO.Path.GetFileNameWithoutExtension(xlsfiles[j - 1]);
                    personList[j - 1] = csvFilePath.ToString();
                }
                bool check_proceed = checkpersonSheetExist(personList);//Check for any missing excelSheet file as per the given person list
                if (check_proceed == false)
                {
                    application.Quit();
                    killApplication(true);
                }
                else
                {
                    for (int i = 1; i <= file_count; i++)
                    {
                        Excel.Workbook temp_wb;
                        excelSheet_det[i - 1, 0] = personList[i - 1].ToString(); //Adding person name as first entry
                        string excelFileName = xlsfiles[i - 1];
                        excelFilePath = excelFileName; //reassigning the excelFilePath to make an entry in excelSheet Log.txt
                        findWordInFile("Non Working", excelFileName); //Calling the method to identify Non-working entry if any
                        //The above method sets flag value to 'y' if true else the flag value remains to 'n'
                        temp_wb = application.Workbooks.Open(excelFileName);
                        Excel.Worksheet worksheet;
                        worksheet = temp_wb.Sheets[1];
                        var cell_val = 0.0;
                        var cell_valA = 0.0;
                        if ((worksheet.Cells[5, 25] as Excel.Range).Value == null || (worksheet.Cells[5, 25] as Excel.Range).Value == 0)
                            cell_val = 0;
                        else
                            cell_val = (double)(worksheet.Cells[5, 25] as Excel.Range).Value;
                        if ((worksheet.Cells[4, 25] as Excel.Range).Value == null || (worksheet.Cells[4, 25] as Excel.Range).Value==0)
                            cell_valA = 0;
                        else
                            cell_valA = (double)(worksheet.Cells[4, 25] as Excel.Range).Value;
                        if (cell_val == 0)
                        {
                            excelSheet_det[i - 1, 1] = cell_valA.ToString(); //Adding Normal Hours
                        }
                        else
                        {
                            excelSheet_det[i - 1, 1] = cell_val.ToString(); //Adding Normal Hours
                        }
                        if (flag == 'y') //if non-working non billable entry found
                        {
                            var cell_val1 = 0.0;
                            if ((worksheet.Cells[14, 25] as Excel.Range).Value == null || (worksheet.Cells[14, 25] as Excel.Range).Value == 0.0)
                                cell_val1 = 0;
                            else
                                cell_val1 = (double)(worksheet.Cells[14, 25] as Excel.Range).Value;
                            excelSheet_det[i - 1, 3] = cell_val1.ToString();//Adding Non-working Hours
                            excelSheet_det[i - 1, 2] = "0"; //Setting working hours to zero
                        }
                        else //if non-working non billable entry NOT found
                        {
                            var cell_val2 = 0.0;
                            if ((worksheet.Cells[14, 25] as Excel.Range).Value == null || (worksheet.Cells[14, 25] as Excel.Range).Value == 0)
                                cell_val2 = 0;
                            else
                                cell_val2 = (double)(worksheet.Cells[14, 25] as Excel.Range).Value;
                            excelSheet_det[i - 1, 2] = cell_val2.ToString();//Adding working Hours
                            excelSheet_det[i - 1, 3] = "0"; //Setting non-working hours to zero
                        }
                        var cell_val3 = 0.0;
                        var cell_val3_1 = 0.0;
                        var cell_val3_2 = 0.0;
                        if ((worksheet.Cells[15, 25] as Excel.Range).Value == null || (worksheet.Cells[15, 25] as Excel.Range).Value == 0)
                            cell_val3 = 0;
                        else
                            cell_val3 = (double)(worksheet.Cells[15, 25] as Excel.Range).Value;
                        if ((worksheet.Cells[13, 25] as Excel.Range).Value == null || (worksheet.Cells[13, 25] as Excel.Range).Value == 0)
                            cell_val3_1 = 0;
                        else
                            cell_val3_1 = (double)(worksheet.Cells[13, 25] as Excel.Range).Value;
                        if ((worksheet.Cells[12, 25] as Excel.Range).Value == null || (worksheet.Cells[12, 25] as Excel.Range).Value == 0)
                            cell_val3_2 = 0;
                        else
                            cell_val3_2 = (double)(worksheet.Cells[12, 25] as Excel.Range).Value;
                        if (cell_val3 == 0 && cell_val3_1 == 0)
                        {
                            excelSheet_det[i - 1, 4] = cell_val3_2.ToString();
                        }
                        else if (cell_val3_1 == 0 && cell_val3_2 == 0)
                        {
                            excelSheet_det[i - 1, 4] = cell_val3.ToString();
                        }
                        else
                        {
                            excelSheet_det[i - 1, 4] = cell_val3_1.ToString();
                        }
                        var cell_val4 = DateTime.MinValue;
                        if ((worksheet.Cells[3, 24] as Excel.Range).Value == null)
                            cell_val4 = DateTime.MinValue;
                        else
                            cell_val4 = (worksheet.Cells[3, 24] as Excel.Range).Value;
                        excelSheet_det[i - 1, 5] = cell_val4.ToString();//copy last date of excelSheet entry
                        Marshal.ReleaseComObject(temp_wb);
                    }
                    application.Workbooks.Close();
                    application.Quit();
                }
                /*Writing the array contents into the excel master sheet*/
                string masterSheetPath = folder_browse_excelSheet.SelectedPath + "\\Master Spreadsheet\\" + "MasterSheet.xls";
                if (!Directory.Exists(folder_browse_excelSheet.SelectedPath +
                     "\\masterSheet"))
                {
                    Directory.CreateDirectory(folder_browse_excelSheet.SelectedPath + "\\Master Spreadsheet");
                }
                if (!File.Exists(masterSheetPath))
                {
                    FileStream fs = File.Create(masterSheetPath);
                    fs.Close();
                }
                Excel.Application excel = new Excel.Application();
                Excel.Workbook workBook = excel.Workbooks.Open(masterSheetPath);
                Excel.Worksheet sheet = workBook.ActiveSheet;
                int numRows = sheet.UsedRange.Rows.Count;
                //MessageBox.Show(numRows.ToString());
                //Check for any duplicate entries in the mastersheet
                string masterDupErrDisplay = string.Empty;
                if (numRows > 1)
                {
                    for (int i1 = 0; i1 < xlsfiles.Length; i1++)
                    {
                        for (int ch = 0; ch < numRows; ch++)
                        {
                            if (excelSheet_det[i1, 5] == (((sheet.Cells[ch + 1, 6] as Excel.Range).Value)).ToString() && excelSheet_det[i1, 0] == ((string)(sheet.Cells[ch + 1, 1] as Excel.Range).Value))
                            {
                                if (!masterDupErrDisplay.Contains(excelSheet_det[i1, 0].ToString()))
                                {
                                    masterDupErrDisplay += string.Join(Environment.NewLine, excelSheet_det[i1, 0].ToString());
                                    masterDupErrDisplay += string.Join(Environment.NewLine, Environment.NewLine);
                                }
                            }

                        }
                    }
                    if (masterDupErrDisplay != null)
                    {
                        if (MessageBox.Show("Duplicate entries have been found for these people " + Environment.NewLine + masterDupErrDisplay + Environment.NewLine + "Do you wish to Continue?", "Duplicate Mastersheet entry Error", MessageBoxButtons.YesNo) == DialogResult.No)
                        {
                            excel.Quit();
                            killApplication(true);
                        }
                    }
                }
                sheet.Cells[1, 1] = "Name";
                sheet.Cells[1, 2] = "Normal Hours";
                sheet.Cells[1, 3] = "Working Hours";
                sheet.Cells[1, 4] = "Not working Hours";
                sheet.Cells[1, 5] = "Total Hours";
                sheet.Cells[1, 6] = "excelSheet Stamp";
                for (int i = 0; i < xlsfiles.Length; i++)
                {
                    sheet.Cells[numRows + 1 + i, 1] = excelSheet_det[i, 0];
                    sheet.Cells[numRows + 1 + i, 2] = excelSheet_det[i, 1];
                    sheet.Cells[numRows + 1 + i, 3] = excelSheet_det[i, 2];
                    sheet.Cells[numRows + 1 + i, 4] = excelSheet_det[i, 3];
                    sheet.Cells[numRows + 1 + i, 5] = excelSheet_det[i, 4];
                    sheet.Cells[numRows + 1 + i, 6] = excelSheet_det[i, 5];
                }
                workBook.RefreshAll();
                excel.Calculate();
                workBook.Save();
                workBook.Close(true);
                excel.Workbooks.Close();
                excel.Quit();
                application.Quit();
                MessageBox.Show("Master track sheet is successfully updated!", "Process Successful");
            }
        }
        catch (Exception ex)
        {
            throw ex;
        }
    }
    #endregion master_tracking_sheet
    public bool checkpersonSheetExist(string[] assocName)
    {
        try
        {
            string[,] personListSupply = new string[35, 2];
            int counter = 0;
            string errorDisplay = string.Empty;
            string line;
            // Read the file and display it line by line.
            System.IO.StreamReader file = new System.IO.StreamReader(folder_browse_excelSheet.SelectedPath + "\\team_members.txt");
            while ((line = file.ReadLine()) != null)
            {
                personListSupply[counter, 0] = line.ToString();
                personListSupply[counter, 1] = "n";
                counter++;
            }
            file.Close();
            int c = 0;
            while (assocName[c] != null)
            {
                c++;
            }
            for (int i = 0; i < counter; i++)
            {
                for (int k = 0; k < c; k++)
                {
                    if (personListSupply[i, 0] == assocName[k])
                    {
                        personListSupply[i, 1] = "y";
                        continue;
                    }

                }
                if (personListSupply[i, 1] == "n")
                {
                    errorDisplay += string.Join(Environment.NewLine, personListSupply[i, 0].ToString());
                    errorDisplay += string.Join(Environment.NewLine, Environment.NewLine);

                }
            }
            if (MessageBox.Show("The excel files are missing for" + Environment.NewLine + errorDisplay + Environment.NewLine + "Do you wish to Continue?", "File Missing Error", MessageBoxButtons.YesNo) == DialogResult.Yes)
            {
                return true;
            }
            else
            {
                return false;
            }
        }
        catch (Exception ex)
        {
            throw ex;
        }

    }
Abhinav Vatsa
  • 59
  • 1
  • 10
  • 1
    What makes you think this can be done faster? Did you do any profiling to discover where the majority of time is spent? How large are the excel files? I honestly wonder if you would not be better off with VBA - you would at least "stay in Excel space" instead of making lots of expensive calls to the code that bridges Excel to your C#. – Floris Jan 15 '14 at 06:54
  • @Floris: I figured out that the majority of the time is spent in the innermost for loop where it retrieves values from the current excel file. The excel files are of ~80 KB in size. I am new to this concept of winforms. I haven't considered using VBA. And what exactly are you pointing to by saying: "you would at least "stay in Excel space" instead of making lots of expensive calls"? Could you please elaborate? – Abhinav Vatsa Jan 15 '14 at 07:01
  • possible duplicate of [Excel Interop - Efficiency and performance](http://stackoverflow.com/questions/356371/excel-interop-efficiency-and-performance) – Jeremy Thompson Jan 15 '14 at 07:03
  • You create at least two additional instances of Excel in your code - depending on your machine, that may be quite slow. It looks like a bit of regular excel coding (maybe just a pivot table) might be all you need - a bit hard to tell without picking your code apart line by line. But I am quite convinced that a C# solution is not the way to get data from one Excel spreadsheet into another. Pretend you can only use Excel to solve this - you will be glad you did. – Floris Jan 15 '14 at 07:10
  • @JeremyThompson: It is not exactly a complete duplicate situation. If you go through the code carefully,you'll notice that some modifications are needed in order to make this code run more much faster. I'm looking for such kind of modifications. – Abhinav Vatsa Jan 15 '14 at 07:11
  • Fair enough but I cant undo the close vote. – Jeremy Thompson Jan 15 '14 at 07:17
  • @JeremyThompson: I understand. You could've waited a bit more for my response to come up before hitting the close vote. Though, that's not such of a problem. – Abhinav Vatsa Jan 15 '14 at 07:20
  • @Floris : I haven't yet used the pivot table anywhere till date. Agree with you on the point that a C# solution is not a proper way to fetch data, but do we really have another better option in this case, where you have a winform which has to go through a set of excel files one by one and rerieve selective info and then store it back into some other spreadsheet? – Abhinav Vatsa Jan 15 '14 at 07:23

3 Answers3

2

I would go with some commercial or open source .NET Excel library like SpreadsheetLight which allows creating xls and xslx files directly without Excel application which is UI based.

You should also take a look at OpenXML.

HABJAN
  • 9,212
  • 3
  • 35
  • 59
  • I wish I could use some commercial library. But, I'm looking to make it possible without using any existing third party libraries. – Abhinav Vatsa Jan 15 '14 at 07:04
1

There are many things which you can do

1.you can improve your code quality (add logger statements and find out which function is taking time debug it and if possible improve it)

2.use less heavy data structures than "heavy but easy to use" data structures and functions. (that really improves performance)

3.remove unwanted code and save unrequired I/O operations (reading everytime from file, instead fetch it once and perform operations on it. this also improves performance little bit)

4.if possible use multithreading, I am not C# guy but if possible use threads. this will improve performance for sure with noticeable gain (if concurrent file operations are possible in your case and if you have high cpu power then try this)

Nachiket Kate
  • 8,473
  • 2
  • 27
  • 45
  • Thanks for your response. Well, this application will be used on different kinds of machines, so even if I go for multithreading, it may pose problems on machines with lower H/W configs. No matter it will definetly boost up my code's performance. Secondly, On every iteration, a new file is loaded. So, as far as I make it out, there are no such unwanted I/O ops. Will be glad if you could correct me on this. – Abhinav Vatsa Jan 15 '14 at 07:17
1

I can think of a couple of things to speed this up but the link I referred you to Excel Interop - Efficiency and performance is one of the best.

If you really want speed you could:

  • Use OpenXML to avoid the Interop overhead
  • Not declare two instances of Excel &/or multiple workbook variables
  • Turn off Excel updating the UI while processing

Edit: Try turning off the ScreenUpdating and set the Calculation to Manual (to avoid any subsequent Excel calculations - if there are any formula's referring to cells you're updating):

private static XlCalculation xlCalculation = XlCalculation.xlCalculationAutomatic;
static public void TurnOffApplicationSettings(Excel.Application xlApp)
{
    xlApp.ScreenUpdating = false;
    xlApp.DisplayAlerts = false;
    xlCalculation = xlApp.Calculation; //Record the current Calculation Mode
    xlApp.Calculation = XlCalculation.xlCalculationManual;
    xlApp.UserControl = false;
    xlApp.EnableEvents = false;
}

Then run your long operation and revert the settings back to true and the calculation back to Automatic (or more correctly what it was set to previously):

static public void TurnOnApplicationSettings(Excel.Application xlApp)
{
    xlApp.ScreenUpdating = true;
    xlApp.DisplayAlerts = true;
    xlApp.Calculation = xlCalculation;
    xlApp.UserControl = true;
    xlApp.EnableEvents = true;
}
Community
  • 1
  • 1
Jeremy Thompson
  • 61,933
  • 36
  • 195
  • 321
  • Indeed [Excel Interop - Efficiency and performance](http://stackoverflow.com/questions/356371/excel-interop-efficiency-and-performance) is a good solution. I'm currently implementing on my existing code. And, I'm not at a position to use some third-party libraries such as OpenXML or SpreadSheetLight due to restriction policies. Where are you exactly referring to with your point:"•Turn off Excel updating the UI while processing"? Where could I actually use it in my above code? – Abhinav Vatsa Jan 15 '14 at 07:28
  • 1
    Hopefully my **edit** made the fourth speed optimization clearer to understand and implement. Pop in a call to turn *Off* at the beginning of the subroutine and to turn them back *On* at the end of the subroutine. – Jeremy Thompson Jan 15 '14 at 07:38
  • Thanks! I got your point! – Abhinav Vatsa Jan 15 '14 at 07:42