1

I am a beginner in c# and I have a excel file with 30000 row and I want to read it and do some processes on each cell. I used this code for reading and after that I have a for loop on this excel file that is so slow (about 2 hours). can you help me about this problem? is there any way to read this file very fast?

            Excel.Application xlApp;
            Excel.Workbook xlWorkBook;
            Excel.Workbook xlWorkBookahan1;
            Excel.Workbook xlWorkBookahan2;
            Excel.Workbook xlWorkBookahangh;
            Excel.Worksheet xlWorkSheet;
            Excel.Worksheet xlWorkSheetahan1;
            Excel.Worksheet xlWorkSheetahan2;
            Excel.Worksheet xlWorkSheetahangh;
            Excel.Range range;

            string str;
            int rCnt;
            int cCnt;
            int rw = 0;
            int cl = 0;

            //
            Excel.Application xlahan1 = new Microsoft.Office.Interop.Excel.Application();
            Excel.Application xlahan2 = new Microsoft.Office.Interop.Excel.Application();
            Excel.Application xlahangh = new Microsoft.Office.Interop.Excel.Application();
            object misValue = System.Reflection.Missing.Value;


            if (xlahan1 == null)
            {
                MessageBox.Show("Excel is not properly installed!!");
                return;
            }
            xlWorkBookahan1 = xlahan1.Workbooks.Add(misValue);
            xlWorkBookahan2 = xlahan2.Workbooks.Add(misValue);
            xlWorkBookahangh = xlahangh.Workbooks.Add(misValue);
            xlWorkSheetahan1 = (Excel.Worksheet)xlWorkBookahan1.Worksheets.get_Item(1);
            xlWorkSheetahan2 = (Excel.Worksheet)xlWorkBookahan2.Worksheets.get_Item(1);
            xlWorkSheetahangh = (Excel.Worksheet)xlWorkBookahangh.Worksheets.get_Item(1);


            //

            xlApp = new Excel.Application();
            xlWorkBook = xlApp.Workbooks.Open(@"C:\Users\maedeh\Desktop\Base.xls", 0, true, 5, "", "", true, Microsoft.Office.Interop.Excel.XlPlatform.xlWindows, "\t", false, false, 0, true, 1, 0);
            xlWorkSheet = (Excel.Worksheet)xlWorkBook.Worksheets.get_Item(1);
            range = xlWorkSheet.UsedRange;
            rw = range.Rows.Count;
            cl = range.Columns.Count;

            int ahanghtei = 0, ahan1 = 0, ahan2 = 0, fooladghatei = 0, foolad1 = 0, foolad2 = 0, ngarmghatei = 0, ngarm1 = 0, ngarm2 = 0, nsardghatei = 0, nsard1 = 0, nsard2 = 0, energyghatei = 0, energy1 = 0, energy2 = 0, hamlghatei = 0, haml1 = 0, haml2 = 0, tmarkazighatei = 0, tmarkazi1 = 0, tmarkazi2 = 0, tgmarkazighatei = 0, tgmarkazi1 = 0, tgmarkazi2 = 0, dfnetghatei = 0, dfnet1 = 0, dfnet2 = 0, mtnasoozghatei = 0, mtnasooz1 = 0, mtnasooz2 = 0, fooladsabaghatei = 0, fooladsaba1 = 0, fooladsaba2 = 0;

            for (rCnt = 2; rCnt <= rw; rCnt++)
            {
                if (range.Cells[rCnt, 25].value == 1)//
                {
                        if (((range.Cells[rCnt, 16]).value <= 1999 && (range.Cells[rCnt, 16]).value >= 1000))
                  {
                      if ((range.Cells[rCnt, 120]).value >= 1000 && (range.Cells[rCnt, 120]).value <= 1999)
                      {
                          ahanghtei++;
                          xlWorkSheetahangh.Cells[ahanghtei, 1] = range.Cells[rCnt, 1];
                          xlWorkSheetahangh.Cells[ahanghtei, 2] = range.Cells[rCnt, 2];
                          xlWorkSheetahangh.Cells[ahanghtei, 3] = range.Cells[rCnt, 3];
                      }
                      else
                      {
                          xlWorkSheet.Rows[rCnt].Interior.Color = System.Drawing.ColorTranslator.ToOle(System.Drawing.Color.Red);
                          ahan1++;
                          xlWorkSheetahan1.Cells[ahan1, 1] = range.Cells[rCnt, 1];
                          xlWorkSheetahan1.Cells[ahan1, 2] = range.Cells[rCnt, 2];
                          xlWorkSheetahan1.Cells[ahan1, 3] = range.Cells[rCnt, 3];
                      }
                  }
                  else
                  {
                      if ((range.Cells[rCnt, 120]).value >= 1000 && (range.Cells[rCnt, 120]).value <= 1999)
                      {
                          xlWorkSheet.Rows[rCnt].Interior.Color = System.Drawing.ColorTranslator.ToOle(System.Drawing.Color.Yellow);
                          //xlWorkSheet.Cells[rCnt, 1].Interior.Color = System.Drawing.ColorTranslator.ToOle(System.Drawing.Color.Yellow);
                          ahan2++;
                          xlWorkSheetahan2.Cells[ahan2, 1] = range.Cells[rCnt, 1];
                          xlWorkSheetahan2.Cells[ahan2, 2] = range.Cells[rCnt, 2];
                          xlWorkSheetahan2.Cells[ahan2, 3] = range.Cells[rCnt, 3];

                      }
                  }

xlWorkBookahan1.SaveAs("C:\\ahan1.xls", Excel.XlFileFormat.xlWorkbookNormal, misValue, misValue, misValue, misValue, Excel.XlSaveAsAccessMode.xlExclusive, misValue, misValue, misValue, misValue, misValue);
            xlWorkBookahan2.SaveAs("C:/Users/maedeh/Desktop/ahan2.xls");
            xlWorkBookahangh.SaveAs("C:/Users/maedeh/Desktop/ahangh.xls");
            xlWorkBookahan1.Close(true, misValue, misValue);
            xlWorkBookahan2.Close(true, misValue, misValue);
            xlWorkBookahangh.Close(true, misValue, misValue);
            xlahan1.Quit();
            xlahan2.Quit();
            xlahangh.Quit();

            Marshal.ReleaseComObject(xlWorkSheetahan1);
            Marshal.ReleaseComObject(xlWorkSheetahan2);
            Marshal.ReleaseComObject(xlWorkSheetahangh);
            Marshal.ReleaseComObject(xlWorkBookahan1);
            Marshal.ReleaseComObject(xlWorkBookahan2);
            Marshal.ReleaseComObject(xlWorkBookahangh);
            Marshal.ReleaseComObject(xlahan1);
            Marshal.ReleaseComObject(xlahan2);
            Marshal.ReleaseComObject(xlahangh);

            Marshal.ReleaseComObject(range);
            Marshal.ReleaseComObject(xlWorkSheet);

            //close and release
            xlWorkBook.Close();
            Marshal.ReleaseComObject(xlWorkBook);

            //quit and release
            xlApp.Quit();
            Marshal.ReleaseComObject(xlApp);
maedeh
  • 105
  • 2
  • 10
  • 2
    Where's the loop? – ashleedawg Dec 16 '17 at 12:13
  • 3
    if the for loop is the part that is slow and needs attention, please show it instead. If automation is too slow as a whole, look into oledb or epplus. – Cee McSharpface Dec 16 '17 at 12:13
  • 3
    Possible duplicate of [How to read an Excel spreadsheet in c# quickly](https://stackoverflow.com/questions/7613898/how-to-read-an-excel-spreadsheet-in-c-sharp-quickly) – cramopy Dec 16 '17 at 12:13
  • Excel interop will always be slower than directly processing the file. Take a look at the [ClosedXML](https://github.com/closedxml/closedxml) or [EPPlus](https://github.com/JanKallman/EPPlus) libraries. Both have nuget packages available and are relatively easy to use. ClosedXML has some decent documentation too. – SpruceMoose Dec 16 '17 at 13:20

2 Answers2

3

You haven't shared the Loop that you're asking about, but if the code sample is what's being repeated 30,000 times, then a big issue is that :

  • you're opening the same file 30,000 times.

  • you're closing that file zero times.

I'm surprised Windows isn't crashing. Did it even finish after the 2 hours?

A good place to start is by Googling each keyword that you don't thoroughly understand. Personally, I like MSDN but there are thousands of helpful sites & examples out there.

For example:

  • Do you know what new Excel.Application does? Let's find out. Click here, then go to the 1st search result.

  • Next, what does Workbooks.Open do? Click here.

I actually don't have much experience with C# Interop but the answers are readily available. Sounds like @Zameer's links could be what you need -- but rather than just copy/pasting code, make sure you understand each step of those examples too!

It can be overwhelming at first, sometimes not even knowing where to start looking, but there are countless resources out there.

Also please see:

Good luck!


Edit:

I hope I didn't sound deprecatory at all; it's sometimes hard to tell a person's experience level, and even regardless of that, I think the simple things are often the easiest to overlook (myself included!) I'm still thinking the object handling is at least part of the issue.

A troubleshooting method I'll use sometimes when I have memory or performance issues while working with multiple objects is to "simplify & summarize". I'll rename make a copy of all the code, delete everything but the objects, and rename them like "object subtype + number", to see kind of like this:

objectcounts

As I said I'm fairly new to C# but I know that in other languages, forgetting to close & release objects can cause serious issues, especially when looped over and over.

Also I can't say for sure but I suspect you'd be better off to re-use a single Excel.Application object, created before the loop and instead of burning resources with multiple create/open/close/release.

Another possibility, instead of going through every row & column in UsedRange, perhaps try the C equivalent of a For Each loop.

Dim c as Range
For Each c in xlWorkSheet.UsedRange
    c.Value = .....
    ...etc..
Next

Last thought, if it's still slow, stick some timers here & there to figure out exactly what's causing the performance problem. (Also check Windows Task Manager while it's running to keep an eye on resources.)

ashleedawg
  • 20,365
  • 9
  • 72
  • 105
  • I put the whole code. this code is very simple and only compare the value of some cells with special values.but the implementation is about 2 hours. – maedeh Dec 18 '17 at 04:33
1

you can use Open XML SDK for this purpose. it is different than interop method, also the Interop method of reading a workbook is normally slow.

I have used openxml sdk to create and read excel containing more than 10k rows on different sheets as well. this should serve your purpose.

Here are some links to get you started.

Read excel using OpenXML

Read excel as data table with OpenXML

Hope it helps

Zameer Fouzan
  • 666
  • 6
  • 15
  • thank you for your guide. I want to use this in a for loop as a above code. can I use this? – maedeh Dec 18 '17 at 05:47
  • when I use openXML, I received this error An unhandled exception of type 'System.IO.FileFormatException' occurred in WindowsBase.dll Additional information: File contains corrupted data. this error was occurred at first line that I want to open the file. – maedeh Dec 18 '17 at 12:11
  • Can you share the code snippet that you used with openXML. yes you can use OpenXML to read your excel, show us how you are trying to do so. Also please add your stacktrace of the exception you are facing. – Zameer Fouzan Dec 19 '17 at 17:05
  • dear Zameer, my file has.xls format and when I change to .xlsx the exception has solved. but I do not know how should I use openXMl with for loop to search the special column in excel sheet. I do not know that I need to use for loop for searching special column or not. I wrote a windows application form that received an excel file. after that it considered column 25 and 120 in each row and if the value of cell was between two values, it increase a counter. I do this with for loop but it was time consuming and I do not know openxml is suitable for this or not. please help me. – maedeh Dec 20 '17 at 17:41